Skip to content

Commit

Permalink
MODERNIZE. Almost all deprecated features fixed.
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Oct 9, 2014
1 parent c5ac7be commit 49660d7
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 12 deletions.
20 changes: 12 additions & 8 deletions src/classifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ extern crate stem;
use tokenize::tokenize;
use stem::get;
use std::collections::HashMap;
use std::collections::hashmap::{Occupied, Vacant};

pub struct NaiveBayesClassifier {
documents: HashMap<String, HashMap<String, uint>>,
Expand All @@ -15,18 +16,21 @@ impl NaiveBayesClassifier {
}

pub fn train(&mut self, text: String, classification: String) {
let classification_map = self.documents.find_or_insert(classification, HashMap::new());
let classification_map = match self.documents.entry(classification) {
Vacant(entry) => entry.set(HashMap::new()),
Occupied(entry) => entry.into_mut()
};

let stemmed_and_tokenized = get_tokenized_and_stemmed(text);
for stemmed_word in stemmed_and_tokenized.move_iter() {
classification_map.insert_or_update_with(stemmed_word, 1, |_key, val| *val += 1);
for stemmed_word in stemmed_and_tokenized.into_iter() {
match classification_map.entry(stemmed_word) {
Vacant(entry) => { entry.set(1); },
Occupied(mut entry) => *entry.get_mut() += 1
}
}
self.total_document_count += 1;
}

pub fn train_ngram(&mut self, text: String, classification: String, n: uint) {

}

pub fn guess(&self, text: String) -> String {
let stemmed_and_tokenized = get_tokenized_and_stemmed(text);

Expand All @@ -50,7 +54,7 @@ impl NaiveBayesClassifier {

let mut answer_label: String = String::from_str("");
let mut answer_probability = 0.0;
for (k,v) in label_probabilities.move_iter() {
for (k,v) in label_probabilities.into_iter() {
if answer_probability <= v {
answer_label = k.clone();
answer_probability = v;
Expand Down
2 changes: 1 addition & 1 deletion src/ngram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl<'a> NGram<'a> {
//Fill the rest of the ngram
for i in range(0, count) {
let a = tokenized_sequence.slice(i,i+self.n);
let sl = Vec::from_slice(a);
let sl = a.to_vec();
ngram_result.push(sl);
}

Expand Down
4 changes: 2 additions & 2 deletions src/phonetics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ fn strip_similar_chars(chars: Vec<char>) -> Vec<char> {
}
let mut chars_no_hw = Vec::new();
let mut chars_no_vowels = Vec::new();
for c in enc_chars.move_iter() {
for c in enc_chars.into_iter() {
if c != '9' {
chars_no_hw.push(c);
}
}
chars_no_hw.dedup();
for c in chars_no_hw.move_iter() {
for c in chars_no_hw.into_iter() {
if c != '0' {
chars_no_vowels.push(c);
}
Expand Down
2 changes: 1 addition & 1 deletion src/tokenize.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pub fn tokenize<'a>(text: &'a str) -> Vec<&str> {
let vec_with_empty: Vec<&str> = text.split(|c: char| char_is_token(c)).collect();
let mut ret_vec = Vec::new();
for s in vec_with_empty.move_iter() {
for s in vec_with_empty.into_iter() {
if s.len() > 0 {
ret_vec.push(s);
}
Expand Down

1 comment on commit 49660d7

@lexi-sh
Copy link
Owner

@lexi-sh lexi-sh commented on 49660d7 Oct 9, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only things still deprecated: vec.append(slice). There's a lot of discomfort with that change, see here:

rust-lang/rust#17029

Please sign in to comment.