Skip to content

Commit 967b75c

Browse files
committed
Split fields without regular expressions
1 parent 71167f3 commit 967b75c

File tree

3 files changed

+53
-40
lines changed

3 files changed

+53
-40
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ edition = "2018"
1313
anyhow = "1"
1414
err-derive = "0.2.4"
1515
memmap = "0.7"
16-
regex = "1"
1716
structopt = "0.3"
1817
ahash = "0.3"
1918
rayon = "1.3"
2019
num_cpus = "1.13"
2120

2221
[dev-dependencies]
2322
quickcheck = "0.9"
23+
regex = "1.3.9"
2424

2525
[profile.release]
2626
codegen-units = 1

src/bin/tf.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::str::FromStr;
22

33
use anyhow::{anyhow, Error};
4-
use regex::Regex;
54
use structopt::StructOpt;
65

76
use topfew::{top_few_from_stream, KeyFinder};
@@ -14,8 +13,7 @@ fn main() -> Result<(), Error> {
1413
options.num
1514
));
1615
}
17-
let sep = Regex::new(&options.regex)?;
18-
let kf = KeyFinder::new(Some(options.fields.indices), sep)?;
16+
let kf = KeyFinder::new(Some(options.fields.indices));
1917
let top_list = top_few_from_stream(options.file.into(), &kf, options.num)?;
2018
for kc in top_list {
2119
println!("{} {}", kc.count, kc.key);
@@ -32,9 +30,6 @@ struct Options {
3230
/// Top number of matches to show
3331
#[structopt(long, short = "n", default_value = "10")]
3432
num: usize,
35-
/// Regular expression used to split lines into fields
36-
#[structopt(long, short = "e", default_value = "[ \\t]")]
37-
regex: String,
3833
/// File to search
3934
file: String,
4035
}

src/key_finder.rs

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,76 @@
11
use anyhow::{anyhow, Error};
2-
use regex::Regex;
32

43
pub struct KeyFinder {
5-
keys: Option<(usize, usize, Vec<bool>)>,
6-
sep: Regex,
4+
keys: Option<(Vec<usize>, usize)>,
75
}
86

97
impl KeyFinder {
10-
pub fn new(keys: Option<Vec<usize>>, sep: Regex) -> Result<Self, Error> {
8+
pub fn new(keys: Option<Vec<usize>>) -> Self {
119
let keys = keys.map(|mut keys| {
1210
keys.sort();
1311

1412
let last = *keys.last().unwrap();
15-
(
16-
keys.len(),
17-
last,
18-
(0..=last)
19-
.map(|i| keys.contains(&(i + 1)))
20-
.collect::<Vec<_>>(),
21-
)
13+
let keep = (0..=last)
14+
.map(|i| keys.contains(&(i + 1)))
15+
.collect::<Vec<_>>();
16+
17+
let mut offsets = Vec::new();
18+
let mut last = usize::MAX;
19+
for (idx, &k) in keep.iter().enumerate() {
20+
if k {
21+
offsets.push(idx.wrapping_sub(last).wrapping_sub(1));
22+
last = idx;
23+
}
24+
}
25+
(offsets, last)
2226
});
23-
Ok(KeyFinder { keys, sep })
27+
KeyFinder { keys }
2428
}
2529

2630
pub fn key<'a>(&self, record: &'a str, s: &'a mut String) -> Result<&'a str, Error> {
27-
let (num, last, keep) = match &self.keys {
31+
let (keep, last) = match &self.keys {
2832
None => return Ok(record),
29-
Some((num, _, _)) if *num == 0 => return Ok(record),
30-
Some((num, last, keep)) => (num, last, keep),
33+
Some(keep) if keep.0.len() == 0 => return Ok(record),
34+
Some(keep) => keep,
3135
};
3236

33-
let mut fields = keep
34-
.iter()
35-
.zip(self.sep.splitn(record, last + 2))
36-
.filter_map(|(keep, field)| if *keep { Some(field) } else { None });
37-
38-
if *num == 1 {
39-
return match fields.next() {
40-
Some(f) => Ok(f),
41-
None => Err(anyhow!("not enough fields to make key")),
42-
};
37+
let mut current = 0;
38+
let mut iter = record.splitn(last + 2, |c| (c == ' ' || c == '\t'));
39+
for &offset in keep {
40+
match iter.nth(offset) {
41+
None => break,
42+
Some(field) => {
43+
if current > 0 {
44+
s.push(' ');
45+
}
46+
s.push_str(field);
47+
current += 1;
48+
}
49+
}
4350
}
4451

45-
let mut found = 0;
46-
for f in fields {
47-
s.push(' ');
48-
s.push_str(f);
49-
found += 1;
50-
}
51-
52-
if found == *num {
52+
if current >= keep.len() {
5353
Ok(s)
5454
} else {
5555
Err(anyhow!("not enough fields to make key"))
5656
}
5757
}
5858
}
59+
60+
#[cfg(test)]
61+
mod tests {
62+
use super::*;
63+
64+
#[test]
65+
fn key() {
66+
let kf = KeyFinder::new(Some(vec![1]));
67+
let mut s = String::new();
68+
assert_eq!(kf.key(TEST, &mut s).unwrap(), "92.109.155.34");
69+
70+
s.clear();
71+
let kf = KeyFinder::new(Some(vec![7]));
72+
assert_eq!(kf.key(TEST, &mut s).unwrap(), "/");
73+
}
74+
75+
const TEST: &str = "92.109.155.34 - - [09/Aug/2018:11:53:26 +0200] \"GET / HTTP/2.0\" 200 3219 \"https://www.facebook.com/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36\"";
76+
}

0 commit comments

Comments
 (0)