From 49660d7225006f4d73d4ad114bbf280457f47216 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 8 Oct 2014 21:40:57 -0400 Subject: [PATCH] MODERNIZE. Almost all deprecated features fixed. --- src/classifier.rs | 20 ++++++++++++-------- src/ngram.rs | 2 +- src/phonetics.rs | 4 ++-- src/tokenize.rs | 2 +- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/classifier.rs b/src/classifier.rs index b5c67f3..59bb44b 100644 --- a/src/classifier.rs +++ b/src/classifier.rs @@ -3,6 +3,7 @@ extern crate stem; use tokenize::tokenize; use stem::get; use std::collections::HashMap; +use std::collections::hashmap::{Occupied, Vacant}; pub struct NaiveBayesClassifier { documents: HashMap>, @@ -15,18 +16,21 @@ impl NaiveBayesClassifier { } pub fn train(&mut self, text: String, classification: String) { - let classification_map = self.documents.find_or_insert(classification, HashMap::new()); + let classification_map = match self.documents.entry(classification) { + Vacant(entry) => entry.set(HashMap::new()), + Occupied(entry) => entry.into_mut() + }; + let stemmed_and_tokenized = get_tokenized_and_stemmed(text); - for stemmed_word in stemmed_and_tokenized.move_iter() { - classification_map.insert_or_update_with(stemmed_word, 1, |_key, val| *val += 1); + for stemmed_word in stemmed_and_tokenized.into_iter() { + match classification_map.entry(stemmed_word) { + Vacant(entry) => { entry.set(1); }, + Occupied(mut entry) => *entry.get_mut() += 1 + } } self.total_document_count += 1; } - pub fn train_ngram(&mut self, text: String, classification: String, n: uint) { - - } - pub fn guess(&self, text: String) -> String { let stemmed_and_tokenized = get_tokenized_and_stemmed(text); @@ -50,7 +54,7 @@ impl NaiveBayesClassifier { let mut answer_label: String = String::from_str(""); let mut answer_probability = 0.0; - for (k,v) in label_probabilities.move_iter() { + for (k,v) in label_probabilities.into_iter() { if answer_probability <= v { answer_label = k.clone(); answer_probability = v; diff --git a/src/ngram.rs b/src/ngram.rs index e8c87a7..00c5d11 100644 --- a/src/ngram.rs +++ b/src/ngram.rs @@ -32,7 +32,7 @@ impl<'a> NGram<'a> { //Fill the rest of the ngram for i in range(0, count) { let a = tokenized_sequence.slice(i,i+self.n); - let sl = Vec::from_slice(a); + let sl = a.to_vec(); ngram_result.push(sl); } diff --git a/src/phonetics.rs b/src/phonetics.rs index 17b0553..df45fbd 100644 --- a/src/phonetics.rs +++ b/src/phonetics.rs @@ -47,13 +47,13 @@ fn strip_similar_chars(chars: Vec) -> Vec { } let mut chars_no_hw = Vec::new(); let mut chars_no_vowels = Vec::new(); - for c in enc_chars.move_iter() { + for c in enc_chars.into_iter() { if c != '9' { chars_no_hw.push(c); } } chars_no_hw.dedup(); - for c in chars_no_hw.move_iter() { + for c in chars_no_hw.into_iter() { if c != '0' { chars_no_vowels.push(c); } diff --git a/src/tokenize.rs b/src/tokenize.rs index 6a72ce7..3d216bb 100644 --- a/src/tokenize.rs +++ b/src/tokenize.rs @@ -1,7 +1,7 @@ pub fn tokenize<'a>(text: &'a str) -> Vec<&str> { let vec_with_empty: Vec<&str> = text.split(|c: char| char_is_token(c)).collect(); let mut ret_vec = Vec::new(); - for s in vec_with_empty.move_iter() { + for s in vec_with_empty.into_iter() { if s.len() > 0 { ret_vec.push(s); }