Skip to content

Commit

Permalink
Add more tests for custom tokenizer and filters
Browse files Browse the repository at this point in the history
  • Loading branch information
piroor committed Mar 5, 2018
1 parent 7bceef7 commit 81824f5
Showing 1 changed file with 44 additions and 6 deletions.
50 changes: 44 additions & 6 deletions test/extensions/hasher_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,31 @@ def test_custom_tokenizer_module
assert_equal hash, Hasher.word_hash("term", tokenizer: BigramTokenizer, token_filters: [])
end

class BigramTokenizerClass
def call(str)
BigramTokenizer.call(str)
end

def self.call(str)
BigramTokenizer.call(str)
end
end

def test_custom_tokenizer_class
hash = { te: 1, er: 1, rm: 1 }
assert_equal hash, Hasher.word_hash("term", tokenizer: BigramTokenizerClass, token_filters: [])
end

def test_custom_tokenizer_instance
hash = { te: 1, er: 1, rm: 1 }
bigram_tokenizer = BigramTokenizerClass.new
assert_equal hash, Hasher.word_hash("term", tokenizer: bigram_tokenizer, token_filters: [])
end

def test_custom_tokenizer_lambda
hash = { te: 1, er: 1, rm: 1 }
bigram_tokenizer = lambda do |str|
str.each_char
.each_cons(2)
.map do |chars| ::ClassifierReborn::Tokenizer::Token.new(chars.join) end
BigramTokenizer.call(str)
end
assert_equal hash, Hasher.word_hash("term", tokenizer: bigram_tokenizer, token_filters: [])
end
Expand All @@ -49,12 +68,31 @@ def test_custom_token_filters_module
assert_equal hash, Hasher.word_hash("cat dog", token_filters: [CatFilter])
end

class CatFilterClass
def call(tokens)
CatFilter.call(tokens)
end

def self.call(tokens)
CatFilter.call(tokens)
end
end

def test_custom_token_filters_class
hash = { dog: 1 }
assert_equal hash, Hasher.word_hash("cat dog", token_filters: [CatFilterClass])
end

def test_custom_token_filters_instance
hash = { dog: 1 }
cat_filter = CatFilterClass.new
assert_equal hash, Hasher.word_hash("cat dog", token_filters: [cat_filter])
end

def test_custom_token_filters_lambda
hash = { dog: 1 }
cat_filter = lambda do |tokens|
tokens.reject do |token|
/\Acat\z/i === token
end
CatFilter.call(tokens)
end
assert_equal hash, Hasher.word_hash("cat dog", token_filters: [cat_filter])
end
Expand Down

0 comments on commit 81824f5

Please sign in to comment.