Describe new modules and classes

piroor · piroor · commit 01528ca33e94 · 2017-12-18T21:10:47.000+09:00
diff --git a/lib/classifier-reborn/extensions/token_filter/stemmer.rb b/lib/classifier-reborn/extensions/token_filter/stemmer.rb
@@ -5,6 +5,7 @@
 
 module ClassifierReborn
   module TokenFilter
+    # This filter converts given tokens to their stemmed versions in the language.
     module Stemmer
       module_function
 
diff --git a/lib/classifier-reborn/extensions/token_filter/stopword.rb b/lib/classifier-reborn/extensions/token_filter/stopword.rb
@@ -5,6 +5,7 @@
 
 module ClassifierReborn
   module TokenFilter
+    # This filter removes stopwords in the language, from given tokens.
     module Stopword
       STOPWORDS_PATH = [File.expand_path(File.dirname(__FILE__) + '/../../../../data/stopwords')]
 
diff --git a/lib/classifier-reborn/extensions/tokenizer/token.rb b/lib/classifier-reborn/extensions/tokenizer/token.rb
@@ -6,6 +6,12 @@
 module ClassifierReborn
   module Tokenizer
     class Token < String
+      # The class can be created with one token string and extra attributes. E.g.,
+      #      t = ClassifierReborn::Tokenizer::Token.new 'Tokenize', stemmable: true, maybe_stopword: false
+      #
+      # Attributes available are:
+      #   stemmable:        true  Possibility that the token can be stemmed. This must be false for un-stemmable terms, otherwise this should be true.
+      #   maybe_stopword:   true  Possibility that the token is a stopword. This must be false for terms which never been stopword, otherwise this should be true.
       def initialize(string, stemmable: true, maybe_stopword: true)
         super(string)
         @stemmable = stemmable
diff --git a/lib/classifier-reborn/extensions/tokenizer/whitespace.rb b/lib/classifier-reborn/extensions/tokenizer/whitespace.rb
@@ -7,6 +7,8 @@
 
 module ClassifierReborn
   module Tokenizer
+    # This tokenizes given input as white-space separated terms.
+    # It mainly aims to tokenize sentences written with a space between words, like English, French, and others.
     module Whitespace
       module_function