Skip to content

Commit

Permalink
feat(node::Find): also change Node::find to return node::Find
Browse files Browse the repository at this point in the history
Using `Node::find` to fetch only a few nodes should be _much_ faster
as the descendant nodes are checked when required, instead of being
checked upfront in the old implementation which returned a `Selection`.

Only counting the nodes matching a predicate should also be faster than
before due to no dynamic memory allocation in `Node::find` and
`node::Find`.

Fixes #19.
  • Loading branch information
utkarshkukreti committed Jul 20, 2016
1 parent 1cc1f66 commit c75dba9
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 26 deletions.
35 changes: 25 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,41 @@ pub fn main() {

println!("# Top 5 Questions");
for node in document.find(Class("question-summary")).iter().take(5) {
let question = node.find(Class("question-hyperlink")).first().unwrap();
let votes = node.find(Class("vote-count-post")).first().unwrap().text();
let answers = node.find(Class("status")).find(Name("strong")).first().unwrap().text();
let tags = node.find(Class("post-tag")).iter().map(|tag| tag.text()).collect::<Vec<_>>();
let asked_on = node.find(Class("relativetime")).first().unwrap().text();
let asker = node.find(Class("user-details")).find(Name("a")).first().unwrap().text();
let question = node.find(Class("question-hyperlink")).next().unwrap();
let votes = node.find(Class("vote-count-post")).next().unwrap().text();
let answers = node.find(Class("status"))
.into_selection()
.find(Name("strong"))
.first()
.unwrap()
.text();
let tags = node.find(Class("post-tag")).map(|tag| tag.text()).collect::<Vec<_>>();
let asked_on = node.find(Class("relativetime")).next().unwrap().text();
let asker = node.find(Class("user-details"))
.into_selection()
.find(Name("a"))
.first()
.unwrap()
.text();
println!(" Question: {}", question.text());
println!(" Answers: {}", answers);
println!(" Votes: {}", votes);
println!(" Tagged: {}", tags.join(", "));
println!(" Asked on: {}", asked_on);
println!(" Asker: {}", asker);
println!("Permalink: http://stackoverflow.com{}", question.attr("href").unwrap());
println!("Permalink: http://stackoverflow.com{}",
question.attr("href").unwrap());
println!("");
}

println!("# Top 10 Related Tags");
for node in document.find(Attr("id", "h-related-tags")).parent().find(Name("div")).iter().take(10) {
let tag = node.find(Name("a")).first().unwrap().text();
let count = node.find(Class("item-multiplier-count")).first().unwrap().text();
for node in document.find(Attr("id", "h-related-tags"))
.parent()
.find(Name("div"))
.iter()
.take(10) {
let tag = node.find(Name("a")).next().unwrap().text();
let count = node.find(Class("item-multiplier-count")).next().unwrap().text();
println!("{} ({})", tag, count);
}
}
Expand Down
35 changes: 25 additions & 10 deletions examples/stackoverflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,41 @@ pub fn main() {

println!("# Top 5 Questions");
for node in document.find(Class("question-summary")).iter().take(5) {
let question = node.find(Class("question-hyperlink")).first().unwrap();
let votes = node.find(Class("vote-count-post")).first().unwrap().text();
let answers = node.find(Class("status")).find(Name("strong")).first().unwrap().text();
let tags = node.find(Class("post-tag")).iter().map(|tag| tag.text()).collect::<Vec<_>>();
let asked_on = node.find(Class("relativetime")).first().unwrap().text();
let asker = node.find(Class("user-details")).find(Name("a")).first().unwrap().text();
let question = node.find(Class("question-hyperlink")).next().unwrap();
let votes = node.find(Class("vote-count-post")).next().unwrap().text();
let answers = node.find(Class("status"))
.into_selection()
.find(Name("strong"))
.first()
.unwrap()
.text();
let tags = node.find(Class("post-tag")).map(|tag| tag.text()).collect::<Vec<_>>();
let asked_on = node.find(Class("relativetime")).next().unwrap().text();
let asker = node.find(Class("user-details"))
.into_selection()
.find(Name("a"))
.first()
.unwrap()
.text();
println!(" Question: {}", question.text());
println!(" Answers: {}", answers);
println!(" Votes: {}", votes);
println!(" Tagged: {}", tags.join(", "));
println!(" Asked on: {}", asked_on);
println!(" Asker: {}", asker);
println!("Permalink: http://stackoverflow.com{}", question.attr("href").unwrap());
println!("Permalink: http://stackoverflow.com{}",
question.attr("href").unwrap());
println!("");
}

println!("# Top 10 Related Tags");
for node in document.find(Attr("id", "h-related-tags")).parent().find(Name("div")).iter().take(10) {
let tag = node.find(Name("a")).first().unwrap().text();
let count = node.find(Class("item-multiplier-count")).first().unwrap().text();
for node in document.find(Attr("id", "h-related-tags"))
.parent()
.find(Name("div"))
.iter()
.take(10) {
let tag = node.find(Name("a")).next().unwrap().text();
let count = node.find(Class("item-multiplier-count")).next().unwrap().text();
println!("{} ({})", tag, count);
}
}
33 changes: 31 additions & 2 deletions src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,12 @@ impl<'a> Node<'a> {
String::from_utf8(buf).unwrap()
}

pub fn find<P: Predicate>(&self, p: P) -> Selection<'a> {
Selection::new(self.document, [self.index].iter().cloned().collect()).find(p)
pub fn find<P: Predicate>(&self, predicate: P) -> Find<P> {
Find {
document: self.document,
descendants: self.descendants(),
predicate: predicate,
}
}

pub fn is<P: Predicate>(&self, p: P) -> bool {
Expand Down Expand Up @@ -267,3 +271,28 @@ impl<'a> Iterator for Descendants<'a> {
Some(self.current)
}
}

pub struct Find<'a, P: Predicate> {
document: &'a Document,
descendants: Descendants<'a>,
predicate: P,
}

impl<'a, P: Predicate> Find<'a, P> {
pub fn into_selection(self) -> Selection<'a> {
Selection::new(self.document, self.map(|node| node.index()).collect())
}
}

impl<'a, P: Predicate> Iterator for Find<'a, P> {
type Item = Node<'a>;

fn next(&mut self) -> Option<Node<'a>> {
for node in &mut self.descendants {
if self.predicate.matches(&node) {
return Some(node);
}
}
None
}
}
7 changes: 3 additions & 4 deletions tests/node_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ speculate! {
let main = document.find(Attr("id", "main"));
let main = main.iter().next().unwrap();

assert_eq!(main.find(Name("span")).len(), 1785);
assert_eq!(main.find(Name("div")).len(), 204);
assert_eq!(main.find(Name("span")).count(), 1785);
assert_eq!(main.find(Name("div")).count(), 204);
};
}

Expand Down Expand Up @@ -175,8 +175,7 @@ speculate! {
for i in 0..document.nodes.len() {
let node = document.nth(i).unwrap();
let actual = node.descendants().map(|node| node.index()).collect::<Vec<_>>();
let expected = node.find(Any);
let expected = expected.iter().map(|node| node.index()).collect::<Vec<_>>();
let expected = node.find(Any).map(|node| node.index()).collect::<Vec<_>>();
assert_eq!(actual, expected);
}
}
Expand Down

0 comments on commit c75dba9

Please sign in to comment.