Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ edition = "2018"
anyhow = "1"
err-derive = "0.2.4"
memmap = "0.7"
regex = "1"
structopt = "0.3"
ahash = "0.3"
rayon = "1.3"
num_cpus = "1.13"

[dev-dependencies]
quickcheck = "0.9"
regex = "1.3.9"

[profile.release]
codegen-units = 1
Expand Down
7 changes: 1 addition & 6 deletions src/bin/tf.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::str::FromStr;

use anyhow::{anyhow, Error};
use regex::Regex;
use structopt::StructOpt;

use topfew::{top_few_from_stream, KeyFinder};
Expand All @@ -14,8 +13,7 @@ fn main() -> Result<(), Error> {
options.num
));
}
let sep = Regex::new(&options.regex)?;
let kf = KeyFinder::new(Some(options.fields.indices), sep)?;
let kf = KeyFinder::new(Some(options.fields.indices));
let top_list = top_few_from_stream(options.file.into(), &kf, options.num)?;
for kc in top_list {
println!("{} {}", kc.count, kc.key);
Expand All @@ -32,9 +30,6 @@ struct Options {
/// Top number of matches to show
#[structopt(long, short = "n", default_value = "10")]
num: usize,
/// Regular expression used to split lines into fields
#[structopt(long, short = "e", default_value = "[ \\t]")]
regex: String,
/// File to search
file: String,
}
Expand Down
84 changes: 51 additions & 33 deletions src/key_finder.rs
Original file line number Diff line number Diff line change
@@ -1,58 +1,76 @@
use anyhow::{anyhow, Error};
use regex::Regex;

pub struct KeyFinder {
keys: Option<(usize, usize, Vec<bool>)>,
sep: Regex,
keys: Option<(Vec<usize>, usize)>,
}

impl KeyFinder {
pub fn new(keys: Option<Vec<usize>>, sep: Regex) -> Result<Self, Error> {
pub fn new(keys: Option<Vec<usize>>) -> Self {
let keys = keys.map(|mut keys| {
keys.sort();

let last = *keys.last().unwrap();
(
keys.len(),
last,
(0..=last)
.map(|i| keys.contains(&(i + 1)))
.collect::<Vec<_>>(),
)
let keep = (0..=last)
.map(|i| keys.contains(&(i + 1)))
.collect::<Vec<_>>();

let mut offsets = Vec::new();
let mut last = usize::MAX;
for (idx, &k) in keep.iter().enumerate() {
if k {
offsets.push(idx.wrapping_sub(last).wrapping_sub(1));
last = idx;
}
}
(offsets, last)
});
Ok(KeyFinder { keys, sep })
KeyFinder { keys }
}

pub fn key<'a>(&self, record: &'a str, s: &'a mut String) -> Result<&'a str, Error> {
let (num, last, keep) = match &self.keys {
let (keep, last) = match &self.keys {
None => return Ok(record),
Some((num, _, _)) if *num == 0 => return Ok(record),
Some((num, last, keep)) => (num, last, keep),
Some(keep) if keep.0.len() == 0 => return Ok(record),
Some(keep) => keep,
};

let mut fields = keep
.iter()
.zip(self.sep.splitn(record, last + 2))
.filter_map(|(keep, field)| if *keep { Some(field) } else { None });

if *num == 1 {
return match fields.next() {
Some(f) => Ok(f),
None => Err(anyhow!("not enough fields to make key")),
};
let mut current = 0;
let mut iter = record.splitn(last + 2, |c| (c == ' ' || c == '\t'));
for &offset in keep {
match iter.nth(offset) {
None => break,
Some(field) => {
if current > 0 {
s.push(' ');
}
s.push_str(field);
current += 1;
}
}
}

let mut found = 0;
for f in fields {
s.push(' ');
s.push_str(f);
found += 1;
}

if found == *num {
if current >= keep.len() {
Ok(s)
} else {
Err(anyhow!("not enough fields to make key"))
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn key() {
let kf = KeyFinder::new(Some(vec![1]));
let mut s = String::new();
assert_eq!(kf.key(TEST, &mut s).unwrap(), "92.109.155.34");

s.clear();
let kf = KeyFinder::new(Some(vec![7]));
assert_eq!(kf.key(TEST, &mut s).unwrap(), "/");
}

const TEST: &str = "92.109.155.34 - - [09/Aug/2018:11:53:26 +0200] \"GET / HTTP/2.0\" 200 3219 \"https://www.facebook.com/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36\"";
}