From 8c6bd8f56723963f4ea833d54ba90037df1e997b Mon Sep 17 00:00:00 2001
From: Dhananjay <dhananjay2002pai@gmail.com>
Date: Fri, 7 Jan 2022 03:25:40 +0530
Subject: [PATCH 1/5] Added ignore feature to parse function

---
 number_parser/parser.py | 99 ++++++++++++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 30 deletions(-)

diff --git a/number_parser/parser.py b/number_parser/parser.py
index e0d67c2..13739d7 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -298,7 +298,7 @@ def parse_fraction(input_string, language=None):
     return None
 
 
-def parse(input_string, language=None):
+def parse(input_string, language=None, ignore=None):
     """
     Converts all the numbers in a sentence written in natural language to their numeric type while keeping
     the other words unchanged. Returns the transformed string.
@@ -326,39 +326,78 @@ def _build_and_add_number(pop_last_space=False):
                 current_sentence.pop()
 
     for token in tokens:
-        compare_token = _strip_accents(token.lower())
-        ordinal_number = _is_ordinal_token(compare_token, lang_data)
-
-        if not compare_token.strip():
-            if not tokens_taken:
+        if ignore:
+            if token in ignore:
+                _build_and_add_number()
                 current_sentence.append(token)
-            continue
+            else:
+                compare_token = _strip_accents(token.lower())
+                ordinal_number = _is_ordinal_token(compare_token, lang_data)
+
+                if not compare_token.strip():
+                    if not tokens_taken:
+                        current_sentence.append(token)
+                    continue
+
+                if compare_token in SENTENCE_SEPARATORS:
+                    _build_and_add_number(pop_last_space=True)
+                    current_sentence.append(token)
+                    final_sentence.extend(current_sentence)
+                    current_sentence = []
+                    continue
+
+                if ordinal_number:
+                    tokens_taken.append(ordinal_number)
+                    _build_and_add_number(pop_last_space=True)
+                elif (
+                        _is_cardinal_token(compare_token, lang_data)
+                        or (_is_skip_token(compare_token, lang_data) and len(tokens_taken) != 0)
+                ):
+                    tokens_taken.append(compare_token)
+                else:
+                    if tokens_taken and _is_skip_token(tokens_taken[-1], lang_data):
+                        # when finishing with a skip_token --> keep it
+                        skip_token = tokens_taken[-1]
+                        tokens_taken.pop()
+                        _build_and_add_number()
+                        current_sentence.extend([skip_token, " "])
+
+                    _build_and_add_number()
+                    current_sentence.append(token)
+        else:
+            compare_token = _strip_accents(token.lower())
+            ordinal_number = _is_ordinal_token(compare_token, lang_data)
 
-        if compare_token in SENTENCE_SEPARATORS:
-            _build_and_add_number(pop_last_space=True)
-            current_sentence.append(token)
-            final_sentence.extend(current_sentence)
-            current_sentence = []
-            continue
+            if not compare_token.strip():
+                if not tokens_taken:
+                    current_sentence.append(token)
+                continue
 
-        if ordinal_number:
-            tokens_taken.append(ordinal_number)
-            _build_and_add_number(pop_last_space=True)
-        elif (
-                _is_cardinal_token(compare_token, lang_data)
-                or (_is_skip_token(compare_token, lang_data) and len(tokens_taken) != 0)
-        ):
-            tokens_taken.append(compare_token)
-        else:
-            if tokens_taken and _is_skip_token(tokens_taken[-1], lang_data):
-                # when finishing with a skip_token --> keep it
-                skip_token = tokens_taken[-1]
-                tokens_taken.pop()
-                _build_and_add_number()
-                current_sentence.extend([skip_token, " "])
+            if compare_token in SENTENCE_SEPARATORS:
+                _build_and_add_number(pop_last_space=True)
+                current_sentence.append(token)
+                final_sentence.extend(current_sentence)
+                current_sentence = []
+                continue
+
+            if ordinal_number:
+                tokens_taken.append(ordinal_number)
+                _build_and_add_number(pop_last_space=True)
+            elif (
+                    _is_cardinal_token(compare_token, lang_data)
+                    or (_is_skip_token(compare_token, lang_data) and len(tokens_taken) != 0)
+            ):
+                tokens_taken.append(compare_token)
+            else:
+                if tokens_taken and _is_skip_token(tokens_taken[-1], lang_data):
+                    # when finishing with a skip_token --> keep it
+                    skip_token = tokens_taken[-1]
+                    tokens_taken.pop()
+                    _build_and_add_number()
+                    current_sentence.extend([skip_token, " "])
 
-            _build_and_add_number()
-            current_sentence.append(token)
+                _build_and_add_number()
+                current_sentence.append(token)
 
     _build_and_add_number()
 

From f79ff0622b4cfb39fcec8b7838be8d360902f920 Mon Sep 17 00:00:00 2001
From: Dhananjay <dhananjay2002pai@gmail.com>
Date: Fri, 7 Jan 2022 03:28:10 +0530
Subject: [PATCH 2/5] Added few tests for number parsing including ignore case

---
 tests/test_number_parsing.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/test_number_parsing.py b/tests/test_number_parsing.py
index 2de6d09..e824c86 100644
--- a/tests/test_number_parsing.py
+++ b/tests/test_number_parsing.py
@@ -121,6 +121,31 @@ def test_parse_sentences_ordinal(expected, test_input, lang):
     assert parse(test_input, lang) == expected
 
 
+@pytest.mark.parametrize(
+    "test_input,expected,lang,ignore",
+    [
+        ('fifty fifth sixty seventh', 'fifty 5 67', 'en', ['fifty','seven']),
+        # en
+        ('Two thousand sentences', '2 thousand sentences', 'en', ['thousand']),
+        ('twenty one', '20 one', 'en', ['one']),
+        ('I have three apples and one pear.', 'I have three apples and 1 pear.', 'en', ['three']),
+        # numeric
+        ('eleven', 'eleven', 'en', ['eleven']),
+        ('one hundred and forty two', 'one 140 two', 'en', ['one','two']),
+        ('one hundred and one', 'one 100 one', 'en', ['one']),
+        ('ignore this sentence', 'ignore this sentence', 'en', ['ignore']),
+        ('five hundred sixty seven thousand twenty four', 'five 167020 four', 'en', ['fifty','five','four']),
+        ('one million four hundred twenty-three thousand nine hundred twenty-two', '1000400 twenty-3900 twenty-two', 'en', ['two','twenty']),
+        ('nine hundred ninety-nine thousand nine hundred ninety-nine', 'nine 190 nine 1000 nine 190 nine', 'en', ['nine']),
+        ('one million fifty thousand', '1000000 fifty 1000', 'en', ['fifty']),
+        ('two billion one hundred forty seven million four hundred eighty three thousand six hundred forty seven', 
+         'two 1000000000 one 140 seven 1000483 thousand 640 seven', 'en', ['two','thousand','seven','one']),
+
+    ]
+)
+def test_parse_including_ignore(expected, test_input, lang, ignore):
+    assert parse(test_input, lang, ignore) == expected
+
 
 @pytest.mark.parametrize(
     "test_input,expected,lang",

From 6e5b7aed46076333f85be2d45ddefdb1f42896f9 Mon Sep 17 00:00:00 2001
From: Dhananjay <dhananjay2002pai@gmail.com>
Date: Fri, 7 Jan 2022 03:59:41 +0530
Subject: [PATCH 3/5] few more test cases for checks

---
 tests/test_number_parsing.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/tests/test_number_parsing.py b/tests/test_number_parsing.py
index e824c86..c2ea7fa 100644
--- a/tests/test_number_parsing.py
+++ b/tests/test_number_parsing.py
@@ -124,22 +124,25 @@ def test_parse_sentences_ordinal(expected, test_input, lang):
 @pytest.mark.parametrize(
     "test_input,expected,lang,ignore",
     [
-        ('fifty fifth sixty seventh', 'fifty 5 67', 'en', ['fifty','seven']),
+        ('fifty fifth sixty seventh', "fifty 5 67", 'en', ['fifty','seven']),
+        ('hundredth and one', "100 and 1", 'en',[]),
+        ('one hundred and forty second', "140 second", 'en', ['second']),
+        ('five thousandth and one', "5000 and one", 'en', ['one']),
         # en
-        ('Two thousand sentences', '2 thousand sentences', 'en', ['thousand']),
-        ('twenty one', '20 one', 'en', ['one']),
-        ('I have three apples and one pear.', 'I have three apples and 1 pear.', 'en', ['three']),
+        ('Two thousand sentences', "2 thousand sentences", 'en', ['thousand']),
+        ('twenty one', "20 one", 'en', ['one']),
+        ('I have three apples and one pear.', "I have three apples and 1 pear.", 'en', ['three']),
         # numeric
-        ('eleven', 'eleven', 'en', ['eleven']),
-        ('one hundred and forty two', 'one 140 two', 'en', ['one','two']),
+        ('eleven', "eleven", 'en', ['eleven']),
+        ('one hundred and forty two', "one 140 two", 'en', ['one','two']),
         ('one hundred and one', 'one 100 one', 'en', ['one']),
-        ('ignore this sentence', 'ignore this sentence', 'en', ['ignore']),
-        ('five hundred sixty seven thousand twenty four', 'five 167020 four', 'en', ['fifty','five','four']),
-        ('one million four hundred twenty-three thousand nine hundred twenty-two', '1000400 twenty-3900 twenty-two', 'en', ['two','twenty']),
-        ('nine hundred ninety-nine thousand nine hundred ninety-nine', 'nine 190 nine 1000 nine 190 nine', 'en', ['nine']),
-        ('one million fifty thousand', '1000000 fifty 1000', 'en', ['fifty']),
+        ('ignore this sentence', "ignore this sentence", 'en', ['ignore']),
+        ('five hundred sixty seven thousand twenty four', "five 167020 four", 'en', ['fifty','five','four']),
+        ('one million four hundred twenty-three thousand nine hundred twenty-two', "1000400 twenty-3900 twenty-two", 'en', ['two','twenty']),
+        ('nine hundred ninety-nine thousand nine hundred ninety-nine', "nine 190 nine 1000 nine 190 nine", 'en', ['nine']),
+        ('one million fifty thousand', "1000000 fifty 1000", 'en', ['fifty']),
         ('two billion one hundred forty seven million four hundred eighty three thousand six hundred forty seven', 
-         'two 1000000000 one 140 seven 1000483 thousand 640 seven', 'en', ['two','thousand','seven','one']),
+         "two 1000000000 one 140 seven 1000483 thousand 640 seven", 'en', ['two','thousand','seven','one']),
 
     ]
 )

From 237d9df448af462059faed64fa8ba8184756ef84 Mon Sep 17 00:00:00 2001
From: Dhananjay <dhananjay2002pai@gmail.com>
Date: Sun, 9 Jan 2022 02:27:18 +0530
Subject: [PATCH 4/5] skip token test case coverage

---
 tests/test_number_parsing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_number_parsing.py b/tests/test_number_parsing.py
index c2ea7fa..8013888 100644
--- a/tests/test_number_parsing.py
+++ b/tests/test_number_parsing.py
@@ -135,8 +135,8 @@ def test_parse_sentences_ordinal(expected, test_input, lang):
         # numeric
         ('eleven', "eleven", 'en', ['eleven']),
         ('one hundred and forty two', "one 140 two", 'en', ['one','two']),
-        ('one hundred and one', 'one 100 one', 'en', ['one']),
-        ('ignore this sentence', "ignore this sentence", 'en', ['ignore']),
+        ('one hundred and one', "one 100 one", 'en', ['one']),
+        ('seven thousand and nothing else',"seven 1000 and nothing else", 'en', ['seven']),
         ('five hundred sixty seven thousand twenty four', "five 167020 four", 'en', ['fifty','five','four']),
         ('one million four hundred twenty-three thousand nine hundred twenty-two', "1000400 twenty-3900 twenty-two", 'en', ['two','twenty']),
         ('nine hundred ninety-nine thousand nine hundred ninety-nine', "nine 190 nine 1000 nine 190 nine", 'en', ['nine']),

From 4fd5e95dced7d4bcbf9418c4712a5c5eac2f82fd Mon Sep 17 00:00:00 2001
From: Dhananjay <dhananjay2002pai@gmail.com>
Date: Sun, 9 Jan 2022 02:55:33 +0530
Subject: [PATCH 5/5] Direct numbers and tens test coverage

---
 tests/test_number_parsing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_number_parsing.py b/tests/test_number_parsing.py
index 8013888..965a3e7 100644
--- a/tests/test_number_parsing.py
+++ b/tests/test_number_parsing.py
@@ -134,6 +134,7 @@ def test_parse_sentences_ordinal(expected, test_input, lang):
         ('I have three apples and one pear.', "I have three apples and 1 pear.", 'en', ['three']),
         # numeric
         ('eleven', "eleven", 'en', ['eleven']),
+        ('ninety thirteen forty', "90 13 forty", 'en', ['forty']),
         ('one hundred and forty two', "one 140 two", 'en', ['one','two']),
         ('one hundred and one', "one 100 one", 'en', ['one']),
         ('seven thousand and nothing else',"seven 1000 and nothing else", 'en', ['seven']),