Skip to content

Commit

Permalink
Merge pull request #273 from gunthercox/data
Browse files Browse the repository at this point in the history
Consolidate data files into corpus
  • Loading branch information
gunthercox authored Sep 3, 2016
2 parents a3af11f + c39adc3 commit e122c1e
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 58 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ include readme.md
include requirements.txt

recursive-include chatterbot/corpus/* *.json
recursive-include chatterbot/adapters/* *.json
recursive-include chatterbot/corpus/* *.data

recursive-exclude * *.pyc
recursive-exclude * *.py~
63 changes: 43 additions & 20 deletions chatterbot/adapters/logic/mathematical_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,35 @@ class MathematicalEvaluation(LogicAdapter):
5) Solve the equation & return result
"""

def __init__(self, **kwargs):
super(MathematicalEvaluation, self).__init__(**kwargs)

language = kwargs.get('math_words_language', 'english')
self.math_words = self.get_language_data(language)

def get_language_data(self, language):
"""
Load language-specific data
"""
from chatterbot.corpus import Corpus

corpus = Corpus()

math_words_data_file_path = corpus.get_file_path(
'chatterbot.corpus.{}.math_words'.format(language),
extension='data'
)

try:
with open(math_words_data_file_path) as data:
return json.load(data)
except IOError:
raise self.UnrecognizedLanguageException(
'A math_words data file was not found for `{}` at `{}`.'.format(
language, math_words_data_file_path
)
)

def can_process(self, statement):
"""
Determines whether it is appropriate for this
Expand Down Expand Up @@ -129,44 +158,30 @@ def normalize(self, string):
# Returning normalized text
return string

def load_data(self, language):
"""
Load language-specific data
"""
if language == "english":
data_file = os.path.join(
os.path.dirname(__file__), 'data', 'math_words_EN.json'
)
with open(data_file) as data_file:
data = json.load(data_file)
self.data = data

def substitute_words(self, string):
"""
Substitutes numbers for words.
"""
self.load_data("english")

condensed_string = '_'.join(string.split())

for word in self.data["words"]:
for word in self.math_words["words"]:
condensed_string = re.sub(
'_'.join(word.split(' ')),
self.data["words"][word],
self.math_words["words"][word],
condensed_string
)

for number in self.data["numbers"]:
for number in self.math_words["numbers"]:
condensed_string = re.sub(
number,
str(self.data["numbers"][number]),
str(self.math_words["numbers"][number]),
condensed_string
)

for scale in self.data["scales"]:
for scale in self.math_words["scales"]:
condensed_string = re.sub(
"_" + scale,
" " + self.data["scales"][scale],
" " + self.math_words["scales"][scale],
condensed_string
)

Expand Down Expand Up @@ -197,3 +212,11 @@ def substitute_words(self, string):
condensed_string[end_index] += " )"

return ' '.join(condensed_string)

class UnrecognizedLanguageException(Exception):

def __init__(self, value='The specified language was not recognized'):
self.value = value

def __str__(self):
return repr(self.value)
6 changes: 3 additions & 3 deletions chatterbot/corpus/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def __init__(self):
current_directory = os.path.dirname(__file__)
self.data_directory = os.path.join(current_directory, 'data')

def get_file_path(self, dotted_path):
def get_file_path(self, dotted_path, extension='json'):
"""
Reads a dotted file path and returns the file path.
"""
Expand All @@ -18,8 +18,8 @@ def get_file_path(self, dotted_path):

corpus_path = os.path.join(*parts)

if os.path.exists(corpus_path + '.json'):
corpus_path += '.json'
if os.path.exists(corpus_path + '.{}'.format(extension)):
corpus_path += '.{}'.format(extension)

return corpus_path

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def setUp(self):
self.adapter = MathematicalEvaluation()

def test_can_process(self):
statement = Statement("What is 10 + 10 + 10?")
statement = Statement('What is 10 + 10 + 10?')
self.assertTrue(self.adapter.can_process(statement))

def test_can_not_process(self):
statement = Statement("What is your favorite song?")
statement = Statement('What is your favorite song?')
self.assertFalse(self.adapter.can_process(statement))

def test_is_integer(self):
Expand All @@ -32,19 +32,23 @@ def test_normalize_empty_string(self):
"""
If a string is empty, the string should be returned.
"""
self.assertEqual(self.adapter.normalize(""), "")
self.assertEqual(self.adapter.normalize(''), '')

def test_normalize_text_to_lowercase(self):
normalized = self.adapter.normalize("HELLO")
normalized = self.adapter.normalize('HELLO')
self.assertTrue(normalized.islower())

def test_normalize_punctuation(self):
normalized = self.adapter.normalize("the end.")
self.assertEqual(normalized, "the end")
normalized = self.adapter.normalize('the end.')
self.assertEqual(normalized, 'the end')

def test_load_data(self):
self.adapter.load_data("english")
self.assertIn("numbers", self.adapter.data)
def test_load_english_data(self):
self.adapter.get_language_data('english')
self.assertIn('numbers', self.adapter.math_words)

def test_load_nonexistent_data(self):
with self.assertRaises(MathematicalEvaluation.UnrecognizedLanguageException):
self.adapter.get_language_data('0101010')


class MathematicalEvaluationOperationTests(TestCase):
Expand All @@ -58,68 +62,68 @@ def setUp(self):
self.python_version = sys.version_info[0]

def test_addition_operator(self):
statement = Statement("What is 100 + 54?")
statement = Statement('What is 100 + 54?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 + 54 ) = 154")
self.assertEqual(response.text, '( 100 + 54 ) = 154')

def test_subtraction_operator(self):
statement = Statement("What is 100 - 58?")
statement = Statement('What is 100 - 58?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 - 58 ) = 42")
self.assertEqual(response.text, '( 100 - 58 ) = 42')

def test_multiplication_operator(self):
statement = Statement("What is 100 * 20")
statement = Statement('What is 100 * 20')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 * 20 ) = 2000")
self.assertEqual(response.text, '( 100 * 20 ) = 2000')

def test_division_operator(self):
statement = Statement("What is 100 / 20")
statement = Statement('What is 100 / 20')
confidence, response = self.adapter.process(statement)

if self.python_version <= 2:
self.assertEqual(response.text, "( 100 / 20 ) = 5")
self.assertEqual(response.text, '( 100 / 20 ) = 5')
else:
self.assertEqual(response.text, "( 100 / 20 ) = 5.0")
self.assertEqual(response.text, '( 100 / 20 ) = 5.0')

def test_parenthesized_multiplication_and_addition(self):
statement = Statement("What is 100 + ( 1000 * 2 )?")
statement = Statement('What is 100 + ( 1000 * 2 )?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100")
self.assertEqual(response.text, '( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100')

def test_parenthesized_with_words(self):
statement = Statement("What is four plus 100 + ( 100 * 2 )?")
statement = Statement('What is four plus 100 + ( 100 * 2 )?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304")
self.assertEqual(response.text, '( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304')

def test_word_numbers_addition(self):
statement = Statement("What is one hundred + four hundred?")
statement = Statement('What is one hundred + four hundred?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 + 400 ) = 500")
self.assertEqual(response.text, '( 100 + 400 ) = 500')

def test_word_division_operator(self):
statement = Statement("What is 100 divided by 100?")
statement = Statement('What is 100 divided by 100?')
confidence, response = self.adapter.process(statement)

if self.python_version <= 2:
self.assertEqual(response.text, "( 100 / 100 ) = 1")
self.assertEqual(response.text, '( 100 / 100 ) = 1')
else:
self.assertEqual(response.text, "( 100 / 100 ) = 1.0")
self.assertEqual(response.text, '( 100 / 100 ) = 1.0')

def test_large_word_division_operator(self):
statement = Statement("What is one thousand two hundred four divided by one hundred?")
statement = Statement('What is one thousand two hundred four divided by one hundred?')
confidence, response = self.adapter.process(statement)

if self.python_version <= 2:
self.assertEqual(response.text, "( 1000 + 200 + 4 ) / ( 100 ) = 12")
self.assertEqual(response.text, '( 1000 + 200 + 4 ) / ( 100 ) = 12')
else:
self.assertEqual(response.text, "( 1000 + 200 + 4 ) / ( 100 ) = 12.04")
self.assertEqual(response.text, '( 1000 + 200 + 4 ) / ( 100 ) = 12.04')

def test_negative_multiplication(self):
statement = Statement("What is -105 * 5")
statement = Statement('What is -105 * 5')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( -105 * 5 ) = -525")
self.assertEqual(response.text, '( -105 * 5 ) = -525')

def test_negative_decimal_multiplication(self):
statement = Statement("What is -100.5 * 20?")
statement = Statement('What is -100.5 * 20?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( -100.5 * 20 ) = -2010.0")
self.assertEqual(response.text, '( -100.5 * 20 ) = -2010.0')

0 comments on commit e122c1e

Please sign in to comment.