Correct unicode equality issue.

gunthercox · Aug 13, 2016 · 2bf3faa · 2bf3faa
1 parent 2f7ea84
commit 2bf3faa
Show file tree

Hide file tree

Showing 7 changed files with 51 additions and 35 deletions.
diff --git a/chatterbot/adapters/logic/base_match.py b/chatterbot/adapters/logic/base_match.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from .logic_adapter import LogicAdapter
 from .mixins import TieBreaking
 

diff --git a/chatterbot/adapters/logic/closest_match.py b/chatterbot/adapters/logic/closest_match.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
 from .base_match import BaseMatchAdapter
-from fuzzywuzzy import process
+from fuzzywuzzy import fuzz
+import sys
 
 
 class ClosestMatchAdapter(BaseMatchAdapter):
@@ -26,25 +28,26 @@ def get(self, input_statement):
  else:
  raise self.EmptyDatasetException()
 
- # Get the text of each statement
- text_of_all_statements = []
+ confidence = -1
+ closest_match = input_statement
+
+ # Find the closest matching known statement
  for statement in statement_list:
- text_of_all_statements.append(statement.text)
+ ratio = fuzz.ratio(input_statement.text, statement.text)
 
- # Check if an exact match exists
- if input_statement.text in text_of_all_statements:
- return 1, input_statement
+  if ratio > confidence:
+  confidence = ratio
+  closest_match = statement
 
- # Get the closest matching statement from the database
- closest_match, confidence = process.extract(
+ '''
+ closest_match, confidence = process.extractOne(
  input_statement.text,
- text_of_all_statements,
-  limit=1
- )[0]
+ text_of_all_statements
+ )
+ '''
 
  # Convert the confidence integer to a percent
  confidence /= 100.0
 
- return confidence, next(
- (s for s in statement_list if s.text == closest_match), None
- )
+ return confidence, closest_match
+
diff --git a/chatterbot/chatterbot.py b/chatterbot/chatterbot.py
@@ -132,14 +132,14 @@ def get_response(self, input_item):
  """
  input_statement = self.input.process_input(input_item)
 
- # Select a response to the input statement
- confidence, response = self.logic.process(input_statement)
-
  existing_statement = self.storage.find(input_statement.text)
 
  if existing_statement:
  input_statement = existing_statement
 
+ # Select a response to the input statement
+ confidence, response = self.logic.process(input_statement)
+
  previous_statement = self.get_last_response_statement()
 
  if previous_statement:

diff --git a/chatterbot/conversation/statement.py b/chatterbot/conversation/statement.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from .response import Response
 
 

diff --git a/chatterbot/corpus/corpus.py b/chatterbot/corpus/corpus.py
@@ -38,15 +38,14 @@ def load_corpus(self, dotted_path):
  """
  Return the data contained within a specified corpus.
  """
-
  corpus_path = self.get_file_path(dotted_path)
 
  corpora = []
 
  if os.path.isdir(corpus_path):
  for dirname, dirnames, filenames in os.walk(corpus_path):
  for datafile in filenames:
- if datafile.endswith(".json"):
+ if datafile.endswith('.json'):
 
  corpus = self.read_corpus(
  os.path.join(dirname, datafile)

diff --git a/tests/conversation_tests/test_statements.py b/tests/conversation_tests/test_statements.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from unittest import TestCase
 from chatterbot.conversation import Statement, Response
 
@@ -7,7 +8,7 @@ class StatementTests(TestCase):
  def setUp(self):
  self.statement = Statement("A test statement.")
 
- def test_equality(self):
+ def test_list_equality(self):
  """
  It should be possible to check if a statement
  exists in the list of statements that another
@@ -17,6 +18,16 @@ def test_equality(self):
  self.assertEqual(len(self.statement.in_response_to), 1)
  self.assertIn(Response("Yo"), self.statement.in_response_to)
 
+ def test_list_equality_unicode(self):
+ """
+ Test that it is possible to check if a statement
+ is in a list of other statements when the
+ statements text is unicode.
+ """
+ statements = [Statement("Hello"), Statement("我很好太感谢")]
+ statement = Statement("我很好太感谢")
+ self.assertIn(statement, statements)
+
  def test_update_response_list_new(self):
  self.statement.add_response(Response("Hello"))
  self.assertTrue(len(self.statement.in_response_to), 1)

diff --git a/tests/training_tests/test_list_training.py b/tests/training_tests/test_list_training.py
@@ -76,7 +76,7 @@ def test_database_has_correct_format(self):
  # There should be a total of 9 statements in the database after training
  self.assertEqual(self.chatbot.storage.count(), 9)
 
- # The first statement should be in responce to another statement yet
+ # The first statement should be in response to another statement
  self.assertEqual(
  len(self.chatbot.storage.find(conversation[0]).in_response_to),
  0
@@ -100,13 +100,13 @@ def test_training_with_unicode_characters(self):
  to the database.
  """
  conversation = [
- u"¶ ∑ ∞ ∫ π ∈ ℝ² ∖ ⩆ ⩇ ⩈ ⩉ ⩊ ⩋ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⋒ ⋓",
- u"⊂ ⊃ ⊆ ⊇ ⊈ ⊉ ⊊ ⊋ ⊄ ⊅ ⫅ ⫆ ⫋ ⫌ ⫃ ⫄ ⫇ ⫈ ⫉ ⫊ ⟃ ⟄",
- u"∠ ∡ ⦛ ⦞ ⦟ ⦢ ⦣ ⦤ ⦥ ⦦ ⦧ ⦨ ⦩ ⦪ ⦫ ⦬ ⦭ ⦮ ⦯ ⦓ ⦔ ⦕ ⦖ ⟀",
- u"∫ ∬ ∭ ∮ ∯ ∰ ∱ ∲ ∳ ⨋ ⨌ ⨍ ⨎ ⨏ ⨐ ⨑ ⨒ ⨓ ⨔ ⨕ ⨖ ⨗ ⨘ ⨙ ⨚ ⨛ ⨜",
- u"≁ ≂ ≃ ≄ ⋍ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ⩯ ⩰ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖",
- u"¬ ⫬ ⫭ ⊨ ⊭ ∀ ∁ ∃ ∄ ∴ ∵ ⊦ ⊬ ⊧ ⊩ ⊮ ⊫ ⊯ ⊪ ⊰ ⊱ ⫗ ⫘",
- u"∧ ∨ ⊻ ⊼ ⊽ ⋎ ⋏ ⟑ ⟇ ⩑ ⩒ ⩓ ⩔ ⩕ ⩖ ⩗ ⩘ ⩙ ⩚ ⩛ ⩜ ⩝ ⩞ ⩟ ⩠ ⩢",
+ u'¶ ∑ ∞ ∫ π ∈ ℝ² ∖ ⩆ ⩇ ⩈ ⩉ ⩊ ⩋ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⋒ ⋓',
+ u'⊂ ⊃ ⊆ ⊇ ⊈ ⊉ ⊊ ⊋ ⊄ ⊅ ⫅ ⫆ ⫋ ⫌ ⫃ ⫄ ⫇ ⫈ ⫉ ⫊ ⟃ ⟄',
+ u'∠ ∡ ⦛ ⦞ ⦟ ⦢ ⦣ ⦤ ⦥ ⦦ ⦧ ⦨ ⦩ ⦪ ⦫ ⦬ ⦭ ⦮ ⦯ ⦓ ⦔ ⦕ ⦖ ⟀',
+ u'∫ ∬ ∭ ∮ ∯ ∰ ∱ ∲ ∳ ⨋ ⨌ ⨍ ⨎ ⨏ ⨐ ⨑ ⨒ ⨓ ⨔ ⨕ ⨖ ⨗ ⨘ ⨙ ⨚ ⨛ ⨜',
+ u'≁ ≂ ≃ ≄ ⋍ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ⩯ ⩰ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖',
+ u'¬ ⫬ ⫭ ⊨ ⊭ ∀ ∁ ∃ ∄ ∴ ∵ ⊦ ⊬ ⊧ ⊩ ⊮ ⊫ ⊯ ⊪ ⊰ ⊱ ⫗ ⫘',
+ u'∧ ∨ ⊻ ⊼ ⊽ ⋎ ⋏ ⟑ ⟇ ⩑ ⩒ ⩓ ⩔ ⩕ ⩖ ⩗ ⩘ ⩙ ⩚ ⩛ ⩜ ⩝ ⩞ ⩟ ⩠ ⩢',
  ]
 
  self.chatbot.train(conversation)
@@ -117,8 +117,9 @@ def test_training_with_unicode_characters(self):
 
  def test_similar_sentence_gets_same_response_multiple_times(self):
  """
- Tests if the bot returns the same response for the same question (which
- is similar to the one present in the training set) when asked repeatedly.
+ Tests if the bot returns the same response for the same
+ question (which is similar to the one present in the training set)
+ when asked repeatedly.
  """
  training = [
  'how do you login to gmail?',
@@ -130,11 +131,11 @@ def test_similar_sentence_gets_same_response_multiple_times(self):
  self.chatbot.train(training)
 
  response_to_trained_set = self.chatbot.get_response('how do you login to gmail?')
- response_to_similar_question_1 = self.chatbot.get_response(similar_question)
- response_to_similar_question_2 = self.chatbot.get_response(similar_question)
+ response1 = self.chatbot.get_response(similar_question)
+ response2 = self.chatbot.get_response(similar_question)
 
- self.assertEqual(response_to_trained_set, response_to_similar_question_1)
- self.assertEqual(response_to_similar_question_1, response_to_similar_question_2)
+ self.assertEqual(response_to_trained_set, response1)
+ self.assertEqual(response1, response2)
 
 
 class ChatterBotResponseTests(ChatBotTestCase):