Skip to content

Commit

Permalink
cargo vendor
Browse files Browse the repository at this point in the history
  • Loading branch information
mcandre committed Nov 9, 2024
1 parent 3a07f06 commit cfc8f4e
Show file tree
Hide file tree
Showing 712 changed files with 131,885 additions and 43,412 deletions.
5 changes: 5 additions & 0 deletions .cargo/cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[source.crates-io]
replace-with = "vendored-sources"

[source.vendored-sources]
directory = "vendor"
2 changes: 1 addition & 1 deletion vendor/aho-corasick/.cargo-checksum.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"11fd01de244f822ebfc5323bebd362d6183ab31ded726899ea117ade2de96d2e","DESIGN.md":"d336d97838a16dbc2052658c8a361434829944e3d80373572d9e75bb04c24e78","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"6c36ae81ea9af36e6f964d7045d9719b427fe36f15db99b879feb5453734941e","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"66f3948ee578c0a5a38c61f795547c453bea7cac4d10457af3a23b88e380aa8a","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"d61f5a33a52d1ee0032782b626ccd066292b0a8827bef63ea3ac02dcc4ec4e47","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"bed6b2f3c37c20baa18d919724984840b76602cc0d461c2684d2ded47673b366","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"3fc777465842825dfca839fe354d88d77a6e94610481ad05497247ebccb0187a","src/packed/api.rs":"37a6580d3578d2244580ce2edc12105933c597f0c0b95d70e53a343e4d9f3582","src/packed/mod.rs":"b2c79103c1ed99b7d4261335909fff41a597a324325d62c3325afd656077bcb9","src/packed/pattern.rs":"dd74572178c20cf651ae272bf2c985fb0b3fadc5140cdcb1bff95a3fbcfe2ade","src/packed/rabinkarp.rs":"7bdabb91ec8a22a37a12edabf893270a04c57dea9d6714d507ba17f094285e15","src/packed/teddy/README.md":"b4b83fb5afafbbea6cb76fe70f49cc8ced888f682d98abe5ea5773e95d9ec2b0","src/packed/teddy/compile.rs":"6725dc38114953c0429652e2a4d31dcc33e54c0f5a6cee79f282c6a3d7b41683","src/packed/teddy/mod.rs":"0ce9fcba05a68301474fe30a71824650d05dcb8f04405fe9fc6b9326229f7db0","src/packed/teddy/runtime.rs":"2dc68cc08e24169eafcebbab35187d6aab38ef994267d2735b019b43bad0e6f2","src/packed/tests.rs":"f28307860843c36c9998657e3518c250ec2f4eac800cf912cad6d7aaa81bab7c","src/packed/vector.rs":"48909869f0ecf8832d338ed54c48a77d972bbcda57476d3b863078a18e59f709","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"b735f3ae8d398849fe0ab1575d634df15803b21167945b894205fdde4a1a9e58","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"ef36a945b8f564771a031d98e0dbf85eea93d1fc66a1c60b7baa98fecf66bb69","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"}
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"747d0fcb1257c9b8b013104da3c5a67f5d6cf8a95a2163b13703c01cab2c010a","DESIGN.md":"59c960e1b73b1d7fb41e4df6c0c1b1fcf44dd2ebc8a349597a7d0595f8cb5130","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"afc4d559a98cf190029af0bf320fc0022725e349cd2a303aac860254e28f3c53","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"c699c07df70be45c666e128509ad571a7649d2073e4ae16ac1efd6793c9c6890","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"bfef1a94c5e7410584b1beb4e857b40d1ae2031b881cbc06fb1300409bbd555f","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"aeb6ee5fd80eea04decbc4b46aa27d1ab270b78d416a644da25b7934f009ee66","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"de94f02b04efd8744fb096759a8897c22012b0e0ca3ace161fd87c71befefe04","src/packed/api.rs":"160d3b10823316f7b0924e13c3afd222c8a7db5c0a00432401f311ef27d6a1b7","src/packed/ext.rs":"66be06fde8558429da23a290584d4b9fae665bf64c2578db4fe5f5f3ee864869","src/packed/mod.rs":"0020cd6f07ba5c8955923a9516d7f758864260eda53a6b6f629131c45ddeec62","src/packed/pattern.rs":"1e3a289a730c141fc30b295811e372d046c6619c7fd670308299b889a06c7673","src/packed/rabinkarp.rs":"403146eb1d838a84601d171393542340513cd1ee7ff750f2372161dd47746586","src/packed/teddy/README.md":"3a43194b64e221543d885176aba3beb1224a927385a20eca842daf6b0ea2f342","src/packed/teddy/builder.rs":"720735ea6c7ff92b081426513e6e82feed24a922849297bb538d28f7b8129f81","src/packed/teddy/generic.rs":"ea252ab05b32cea7dd9d71e332071d243db7dd0362e049252a27e5881ba2bf39","src/packed/teddy/mod.rs":"17d741f7e2fb9dbac5ba7d1bd4542cf1e35e9f146ace728e23fe6bbed20028b2","src/packed/tests.rs":"8e2f56eb3890ed3876ecb47d3121996e416563127b6430110d7b516df3f83b4b","src/packed/vector.rs":"840065521cbd4701fa5b8b506d1537843d858c903f7cadf3c68749ea1780874b","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"4ab6dbdba10027ddec2af63a9b28ce4eee30ded0daa5d8eb068b2b55542b6039","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"9fa4498f18bf70478b1996c1a013698b626d15f119aa81dbc536673c9f045718","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"}
13 changes: 9 additions & 4 deletions vendor/aho-corasick/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@
edition = "2021"
rust-version = "1.60.0"
name = "aho-corasick"
version = "1.0.1"
version = "1.1.2"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/aho-corasick-debug"]
exclude = [
"/aho-corasick-debug",
"/benchmarks",
"/tmp",
]
autotests = false
description = "Fast multiple substring searching."
homepage = "https://github.com/BurntSushi/aho-corasick"
Expand All @@ -36,13 +40,14 @@ all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
"--generate-link-to-definition",
]

[profile.bench]
debug = true
debug = 2

[profile.release]
debug = true
debug = 2

[lib]
name = "aho_corasick"
Expand Down
12 changes: 6 additions & 6 deletions vendor/aho-corasick/DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -470,12 +470,12 @@ If all of that fails, then a packed multiple substring algorithm will be
attempted. Currently, the only algorithm available for this is Teddy, but more
may be added in the future. Teddy is unlike the above prefilters in that it
confirms its own matches, so when Teddy is active, it might not be necessary
for Aho-Corasick to run at all. However, the current Teddy implementation only
works in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only
works _well_ when there are a small number of patterns (say, less than 100).
Teddy also requires the haystack to be of a certain length (more than 16-34
bytes). When the haystack is shorter than that, Rabin-Karp is used instead.
(See `src/packed/rabinkarp.rs`.)
for Aho-Corasick to run at all. However, the current Teddy implementation
only works in `x86_64` when SSSE3 or AVX2 are available or in `aarch64`
(using NEON), and moreover, only works _well_ when there are a small number
of patterns (say, less than 100). Teddy also requires the haystack to be of a
certain length (more than 16-34 bytes). When the haystack is shorter than that,
Rabin-Karp is used instead. (See `src/packed/rabinkarp.rs`.)

There is a more thorough description of Teddy at
[`src/packed/teddy/README.md`](src/packed/teddy/README.md).
2 changes: 2 additions & 0 deletions vendor/aho-corasick/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,5 @@ supported version of Rust.

* [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/)
is a Python wrapper for this library.
* [tmikus/ahocorasick_rs](https://github.com/tmikus/ahocorasick_rs) is a Go
wrapper for this library.
4 changes: 3 additions & 1 deletion vendor/aho-corasick/src/ahocorasick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1975,6 +1975,7 @@ impl AhoCorasick {
/// configurations:
///
/// ```
/// # if !cfg!(target_pointer_width = "64") { return; }
/// use aho_corasick::{AhoCorasick, AhoCorasickKind, MatchKind};
///
/// let ac = AhoCorasick::builder()
Expand All @@ -1995,7 +1996,7 @@ impl AhoCorasick {
/// .ascii_case_insensitive(true)
/// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
/// .unwrap();
/// assert_eq!(9_128, ac.memory_usage());
/// assert_eq!(10_879, ac.memory_usage());
///
/// let ac = AhoCorasick::builder()
/// .kind(Some(AhoCorasickKind::ContiguousNFA))
Expand Down Expand Up @@ -2578,6 +2579,7 @@ impl AhoCorasickBuilder {
/// More to the point, the memory usage increases superlinearly as this
/// number increases.
pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder {
self.nfa_noncontiguous.dense_depth(depth);
self.nfa_contiguous.dense_depth(depth);
self
}
Expand Down
107 changes: 72 additions & 35 deletions vendor/aho-corasick/src/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,9 @@ pub struct DFA {
/// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride,
/// 2*stride, 3*stride, ...
trans: Vec<StateID>,
/// The matches for every match state in this DFA. This is indexed by order
/// of match states in the DFA. Namely, as constructed, match states are
/// always laid out sequentially and contiguously in memory. Thus, after
/// converting a match state ID to a match state index, the indices are
/// all adjacent.
///
/// More concretely, when a search enters a match state with id 'sid', then
/// the matching patterns are at 'matches[(sid >> stride2) - 2]'. The '- 2'
/// is to offset the first two states of a DFA: the dead and fail states.
/// The matches for every match state in this DFA. This is first indexed by
/// state index (so that's `sid >> stride2`) and then by order in which the
/// matches are meant to occur.
matches: Vec<Vec<PatternID>>,
/// The amount of heap memory used, in bytes, by the inner Vecs of
/// 'matches'.
Expand Down Expand Up @@ -174,13 +168,19 @@ impl DFA {

/// Adds the given pattern IDs as matches to the given state and also
/// records the added memory usage.
fn set_matches(&mut self, sid: StateID, pids: &[PatternID]) {
use core::mem::size_of;

assert!(!pids.is_empty(), "match state must have non-empty pids");
fn set_matches(
&mut self,
sid: StateID,
pids: impl Iterator<Item = PatternID>,
) {
let index = (sid.as_usize() >> self.stride2).checked_sub(2).unwrap();
self.matches[index].extend_from_slice(pids);
self.matches_memory_usage += size_of::<PatternID>() * pids.len();
let mut at_least_one = false;
for pid in pids {
self.matches[index].push(pid);
self.matches_memory_usage += PatternID::SIZE;
at_least_one = true;
}
assert!(at_least_one, "match state must have non-empty pids");
}
}

Expand Down Expand Up @@ -524,6 +524,18 @@ impl Builder {
dfa.byte_classes.alphabet_len(),
dfa.byte_classes.stride(),
);
// The vectors can grow ~twice as big during construction because a
// Vec amortizes growth. But here, let's shrink things back down to
// what we actually need since we're never going to add more to it.
dfa.trans.shrink_to_fit();
dfa.pattern_lens.shrink_to_fit();
dfa.matches.shrink_to_fit();
// TODO: We might also want to shrink each Vec inside of `dfa.matches`,
// or even better, convert it to one contiguous allocation. But I think
// I went with nested allocs for good reason (can't remember), so this
// may be tricky to do. I decided not to shrink them here because it
// might require a fair bit of work to do. It's unclear whether it's
// worth it.
Ok(dfa)
}

Expand All @@ -543,20 +555,33 @@ impl Builder {
};
for (oldsid, state) in nnfa.states().iter().with_state_ids() {
let newsid = old2new(oldsid);
if !state.matches.is_empty() {
dfa.set_matches(newsid, &state.matches);
if state.is_match() {
dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
}
sparse_iter(
state,
nnfa,
oldsid,
&dfa.byte_classes,
|byte, class, mut oldnextsid| {
if oldnextsid == noncontiguous::NFA::FAIL {
if anchored.is_anchored() {
oldnextsid = noncontiguous::NFA::DEAD;
} else if state.fail() == noncontiguous::NFA::DEAD {
// This is a special case that avoids following
// DEAD transitions in a non-contiguous NFA.
// Following these transitions is pretty slow
// because the non-contiguous NFA will always use
// a sparse representation for it (because the
// DEAD state is usually treated as a sentinel).
// The *vast* majority of failure states are DEAD
// states, so this winds up being pretty slow if
// we go through the non-contiguous NFA state
// transition logic. Instead, just do it ourselves.
oldnextsid = noncontiguous::NFA::DEAD;
} else {
oldnextsid = nnfa.next_state(
Anchored::No,
state.fail,
state.fail(),
byte,
);
}
Expand All @@ -569,7 +594,7 @@ impl Builder {
// Now that we've remapped all the IDs in our states, all that's left
// is remapping the special state IDs.
let old = nnfa.special();
let mut new = &mut dfa.special;
let new = &mut dfa.special;
new.max_special_id = old2new(old.max_special_id);
new.max_match_id = old2new(old.max_match_id);
if anchored.is_anchored() {
Expand Down Expand Up @@ -620,11 +645,12 @@ impl Builder {
remap_anchored[oldsid] = newsid;
is_anchored[newsid.as_usize() >> stride2] = true;
}
if !state.matches.is_empty() {
dfa.set_matches(newsid, &state.matches);
if state.is_match() {
dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
}
sparse_iter(
state,
nnfa,
oldsid,
&dfa.byte_classes,
|_, class, oldnextsid| {
let class = usize::from(class);
Expand All @@ -645,18 +671,28 @@ impl Builder {
remap_unanchored[oldsid] = unewsid;
remap_anchored[oldsid] = anewsid;
is_anchored[anewsid.as_usize() >> stride2] = true;
if !state.matches.is_empty() {
dfa.set_matches(unewsid, &state.matches);
dfa.set_matches(anewsid, &state.matches);
if state.is_match() {
dfa.set_matches(unewsid, nnfa.iter_matches(oldsid));
dfa.set_matches(anewsid, nnfa.iter_matches(oldsid));
}
sparse_iter(
state,
nnfa,
oldsid,
&dfa.byte_classes,
|byte, class, oldnextsid| {
let class = usize::from(class);
if oldnextsid == noncontiguous::NFA::FAIL {
dfa.trans[unewsid.as_usize() + class] = nnfa
.next_state(Anchored::No, state.fail, byte);
let oldnextsid =
if state.fail() == noncontiguous::NFA::DEAD {
noncontiguous::NFA::DEAD
} else {
nnfa.next_state(
Anchored::No,
state.fail(),
byte,
)
};
dfa.trans[unewsid.as_usize() + class] = oldnextsid;
} else {
dfa.trans[unewsid.as_usize() + class] = oldnextsid;
dfa.trans[anewsid.as_usize() + class] = oldnextsid;
Expand All @@ -680,7 +716,7 @@ impl Builder {
// Now that we've remapped all the IDs in our states, all that's left
// is remapping the special state IDs.
let old = nnfa.special();
let mut new = &mut dfa.special;
let new = &mut dfa.special;
new.max_special_id = remap_anchored[old.max_special_id];
new.max_match_id = remap_anchored[old.max_match_id];
new.start_unanchored_id = remap_unanchored[old.start_unanchored_id];
Expand Down Expand Up @@ -763,14 +799,15 @@ impl Builder {
/// `byte_classes.alphabet_len()` times, once for every possible class in
/// ascending order.
fn sparse_iter<F: FnMut(u8, u8, StateID)>(
state: &noncontiguous::State,
nnfa: &noncontiguous::NFA,
oldsid: StateID,
classes: &ByteClasses,
mut f: F,
) {
let mut prev_class = None;
let mut byte = 0usize;
for &(b, id) in state.trans.iter() {
while byte < usize::from(b) {
for t in nnfa.iter_trans(oldsid) {
while byte < usize::from(t.byte()) {
let rep = byte.as_u8();
let class = classes.get(rep);
byte += 1;
Expand All @@ -779,11 +816,11 @@ fn sparse_iter<F: FnMut(u8, u8, StateID)>(
prev_class = Some(class);
}
}
let rep = b;
let rep = t.byte();
let class = classes.get(rep);
byte += 1;
if prev_class != Some(class) {
f(rep, class, id);
f(rep, class, t.next());
prev_class = Some(class);
}
}
Expand Down
Loading

0 comments on commit cfc8f4e

Please sign in to comment.