Skip to content

Commit 9ff7ee6

Browse files
committed
Split fields without regular expressions
1 parent 71167f3 commit 9ff7ee6

File tree

4 files changed

+38
-41
lines changed

4 files changed

+38
-41
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ edition = "2018"
1313
anyhow = "1"
1414
err-derive = "0.2.4"
1515
memmap = "0.7"
16-
regex = "1"
1716
structopt = "0.3"
1817
ahash = "0.3"
1918
rayon = "1.3"

src/bin/tf.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::str::FromStr;
22

33
use anyhow::{anyhow, Error};
4-
use regex::Regex;
54
use structopt::StructOpt;
65

76
use topfew::{top_few_from_stream, KeyFinder};
@@ -14,8 +13,7 @@ fn main() -> Result<(), Error> {
1413
options.num
1514
));
1615
}
17-
let sep = Regex::new(&options.regex)?;
18-
let kf = KeyFinder::new(Some(options.fields.indices), sep)?;
16+
let kf = KeyFinder::new(Some(options.fields.indices));
1917
let top_list = top_few_from_stream(options.file.into(), &kf, options.num)?;
2018
for kc in top_list {
2119
println!("{} {}", kc.count, kc.key);
@@ -32,9 +30,6 @@ struct Options {
3230
/// Top number of matches to show
3331
#[structopt(long, short = "n", default_value = "10")]
3432
num: usize,
35-
/// Regular expression used to split lines into fields
36-
#[structopt(long, short = "e", default_value = "[ \\t]")]
37-
regex: String,
3833
/// File to search
3934
file: String,
4035
}

src/key_finder.rs

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,62 @@
11
use anyhow::{anyhow, Error};
2-
use regex::Regex;
32

43
pub struct KeyFinder {
5-
keys: Option<(usize, usize, Vec<bool>)>,
6-
sep: Regex,
4+
keys: Option<Vec<bool>>,
75
}
86

97
impl KeyFinder {
10-
pub fn new(keys: Option<Vec<usize>>, sep: Regex) -> Result<Self, Error> {
8+
pub fn new(keys: Option<Vec<usize>>) -> Self {
119
let keys = keys.map(|mut keys| {
1210
keys.sort();
1311

1412
let last = *keys.last().unwrap();
15-
(
16-
keys.len(),
17-
last,
18-
(0..=last)
19-
.map(|i| keys.contains(&(i + 1)))
20-
.collect::<Vec<_>>(),
21-
)
13+
(0..=last)
14+
.map(|i| keys.contains(&(i + 1)))
15+
.collect::<Vec<_>>()
2216
});
23-
Ok(KeyFinder { keys, sep })
17+
KeyFinder { keys }
2418
}
2519

2620
pub fn key<'a>(&self, record: &'a str, s: &'a mut String) -> Result<&'a str, Error> {
27-
let (num, last, keep) = match &self.keys {
21+
let keep = match &self.keys {
2822
None => return Ok(record),
29-
Some((num, _, _)) if *num == 0 => return Ok(record),
30-
Some((num, last, keep)) => (num, last, keep),
23+
Some(keep) if keep.len() == 0 => return Ok(record),
24+
Some(keep) => keep,
3125
};
3226

33-
let mut fields = keep
34-
.iter()
35-
.zip(self.sep.splitn(record, last + 2))
36-
.filter_map(|(keep, field)| if *keep { Some(field) } else { None });
37-
38-
if *num == 1 {
39-
return match fields.next() {
40-
Some(f) => Ok(f),
41-
None => Err(anyhow!("not enough fields to make key")),
42-
};
43-
}
44-
45-
let mut found = 0;
46-
for f in fields {
47-
s.push(' ');
48-
s.push_str(f);
49-
found += 1;
27+
let mut current = 0;
28+
for c in record.chars() {
29+
if c == ' ' || c == '\t' {
30+
current += 1;
31+
} else if current < keep.len() && keep[current] {
32+
s.push(c);
33+
} else if current >= keep.len() {
34+
break;
35+
}
5036
}
5137

52-
if found == *num {
38+
if current >= keep.len() {
5339
Ok(s)
5440
} else {
5541
Err(anyhow!("not enough fields to make key"))
5642
}
5743
}
5844
}
45+
46+
#[cfg(test)]
47+
mod tests {
48+
use super::*;
49+
50+
#[test]
51+
fn key() {
52+
let kf = KeyFinder::new(Some(vec![1]));
53+
let mut s = String::new();
54+
assert_eq!(kf.key(TEST, &mut s).unwrap(), "92.109.155.34");
55+
56+
s.clear();
57+
let kf = KeyFinder::new(Some(vec![7]));
58+
assert_eq!(kf.key(TEST, &mut s).unwrap(), "/");
59+
}
60+
61+
const TEST: &str = "92.109.155.34 - - [09/Aug/2018:11:53:26 +0200] \"GET / HTTP/2.0\" 200 3219 \"https://www.facebook.com/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36\"";
62+
}

0 commit comments

Comments
 (0)