From c75dba944de4c01185711e0ca94a806ce46704bb Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Wed, 20 Jul 2016 15:48:09 +0530 Subject: [PATCH] feat(node::Find): also change `Node::find` to return `node::Find` Using `Node::find` to fetch only a few nodes should be _much_ faster as the descendant nodes are checked when required, instead of being checked upfront in the old implementation which returned a `Selection`. Only counting the nodes matching a predicate should also be faster than before due to no dynamic memory allocation in `Node::find` and `node::Find`. Fixes #19. --- README.md | 35 +++++++++++++++++++++++++---------- examples/stackoverflow.rs | 35 +++++++++++++++++++++++++---------- src/node.rs | 33 +++++++++++++++++++++++++++++++-- tests/node_tests.rs | 7 +++---- 4 files changed, 84 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index ddd39f7..79748c9 100644 --- a/README.md +++ b/README.md @@ -29,26 +29,41 @@ pub fn main() { println!("# Top 5 Questions"); for node in document.find(Class("question-summary")).iter().take(5) { - let question = node.find(Class("question-hyperlink")).first().unwrap(); - let votes = node.find(Class("vote-count-post")).first().unwrap().text(); - let answers = node.find(Class("status")).find(Name("strong")).first().unwrap().text(); - let tags = node.find(Class("post-tag")).iter().map(|tag| tag.text()).collect::>(); - let asked_on = node.find(Class("relativetime")).first().unwrap().text(); - let asker = node.find(Class("user-details")).find(Name("a")).first().unwrap().text(); + let question = node.find(Class("question-hyperlink")).next().unwrap(); + let votes = node.find(Class("vote-count-post")).next().unwrap().text(); + let answers = node.find(Class("status")) + .into_selection() + .find(Name("strong")) + .first() + .unwrap() + .text(); + let tags = node.find(Class("post-tag")).map(|tag| tag.text()).collect::>(); + let asked_on = node.find(Class("relativetime")).next().unwrap().text(); + let asker = node.find(Class("user-details")) + .into_selection() + .find(Name("a")) + .first() + .unwrap() + .text(); println!(" Question: {}", question.text()); println!(" Answers: {}", answers); println!(" Votes: {}", votes); println!(" Tagged: {}", tags.join(", ")); println!(" Asked on: {}", asked_on); println!(" Asker: {}", asker); - println!("Permalink: http://stackoverflow.com{}", question.attr("href").unwrap()); + println!("Permalink: http://stackoverflow.com{}", + question.attr("href").unwrap()); println!(""); } println!("# Top 10 Related Tags"); - for node in document.find(Attr("id", "h-related-tags")).parent().find(Name("div")).iter().take(10) { - let tag = node.find(Name("a")).first().unwrap().text(); - let count = node.find(Class("item-multiplier-count")).first().unwrap().text(); + for node in document.find(Attr("id", "h-related-tags")) + .parent() + .find(Name("div")) + .iter() + .take(10) { + let tag = node.find(Name("a")).next().unwrap().text(); + let count = node.find(Class("item-multiplier-count")).next().unwrap().text(); println!("{} ({})", tag, count); } } diff --git a/examples/stackoverflow.rs b/examples/stackoverflow.rs index a7cc688..ebeb003 100644 --- a/examples/stackoverflow.rs +++ b/examples/stackoverflow.rs @@ -16,26 +16,41 @@ pub fn main() { println!("# Top 5 Questions"); for node in document.find(Class("question-summary")).iter().take(5) { - let question = node.find(Class("question-hyperlink")).first().unwrap(); - let votes = node.find(Class("vote-count-post")).first().unwrap().text(); - let answers = node.find(Class("status")).find(Name("strong")).first().unwrap().text(); - let tags = node.find(Class("post-tag")).iter().map(|tag| tag.text()).collect::>(); - let asked_on = node.find(Class("relativetime")).first().unwrap().text(); - let asker = node.find(Class("user-details")).find(Name("a")).first().unwrap().text(); + let question = node.find(Class("question-hyperlink")).next().unwrap(); + let votes = node.find(Class("vote-count-post")).next().unwrap().text(); + let answers = node.find(Class("status")) + .into_selection() + .find(Name("strong")) + .first() + .unwrap() + .text(); + let tags = node.find(Class("post-tag")).map(|tag| tag.text()).collect::>(); + let asked_on = node.find(Class("relativetime")).next().unwrap().text(); + let asker = node.find(Class("user-details")) + .into_selection() + .find(Name("a")) + .first() + .unwrap() + .text(); println!(" Question: {}", question.text()); println!(" Answers: {}", answers); println!(" Votes: {}", votes); println!(" Tagged: {}", tags.join(", ")); println!(" Asked on: {}", asked_on); println!(" Asker: {}", asker); - println!("Permalink: http://stackoverflow.com{}", question.attr("href").unwrap()); + println!("Permalink: http://stackoverflow.com{}", + question.attr("href").unwrap()); println!(""); } println!("# Top 10 Related Tags"); - for node in document.find(Attr("id", "h-related-tags")).parent().find(Name("div")).iter().take(10) { - let tag = node.find(Name("a")).first().unwrap().text(); - let count = node.find(Class("item-multiplier-count")).first().unwrap().text(); + for node in document.find(Attr("id", "h-related-tags")) + .parent() + .find(Name("div")) + .iter() + .take(10) { + let tag = node.find(Name("a")).next().unwrap().text(); + let count = node.find(Class("item-multiplier-count")).next().unwrap().text(); println!("{} ({})", tag, count); } } diff --git a/src/node.rs b/src/node.rs index 70d7889..f3a73df 100644 --- a/src/node.rs +++ b/src/node.rs @@ -144,8 +144,12 @@ impl<'a> Node<'a> { String::from_utf8(buf).unwrap() } - pub fn find(&self, p: P) -> Selection<'a> { - Selection::new(self.document, [self.index].iter().cloned().collect()).find(p) + pub fn find(&self, predicate: P) -> Find

{ + Find { + document: self.document, + descendants: self.descendants(), + predicate: predicate, + } } pub fn is(&self, p: P) -> bool { @@ -267,3 +271,28 @@ impl<'a> Iterator for Descendants<'a> { Some(self.current) } } + +pub struct Find<'a, P: Predicate> { + document: &'a Document, + descendants: Descendants<'a>, + predicate: P, +} + +impl<'a, P: Predicate> Find<'a, P> { + pub fn into_selection(self) -> Selection<'a> { + Selection::new(self.document, self.map(|node| node.index()).collect()) + } +} + +impl<'a, P: Predicate> Iterator for Find<'a, P> { + type Item = Node<'a>; + + fn next(&mut self) -> Option> { + for node in &mut self.descendants { + if self.predicate.matches(&node) { + return Some(node); + } + } + None + } +} diff --git a/tests/node_tests.rs b/tests/node_tests.rs index 4c9fc26..dfa38a9 100644 --- a/tests/node_tests.rs +++ b/tests/node_tests.rs @@ -104,8 +104,8 @@ speculate! { let main = document.find(Attr("id", "main")); let main = main.iter().next().unwrap(); - assert_eq!(main.find(Name("span")).len(), 1785); - assert_eq!(main.find(Name("div")).len(), 204); + assert_eq!(main.find(Name("span")).count(), 1785); + assert_eq!(main.find(Name("div")).count(), 204); }; } @@ -175,8 +175,7 @@ speculate! { for i in 0..document.nodes.len() { let node = document.nth(i).unwrap(); let actual = node.descendants().map(|node| node.index()).collect::>(); - let expected = node.find(Any); - let expected = expected.iter().map(|node| node.index()).collect::>(); + let expected = node.find(Any).map(|node| node.index()).collect::>(); assert_eq!(actual, expected); } }