diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..2921037
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include requirements.txt
+include LICENSE
+recursive-include ovos_utterance_normalizer *
\ No newline at end of file
diff --git a/ovos_utterance_normalizer/__init__.py b/ovos_utterance_normalizer/__init__.py
new file mode 100644
index 0000000..aabb1cf
--- /dev/null
+++ b/ovos_utterance_normalizer/__init__.py
@@ -0,0 +1,61 @@
+import string
+from typing import Optional, List
+from ovos_utterance_normalizer.normalizer import Normalizer, CatalanNormalizer, CzechNormalizer, \
+    PortugueseNormalizer, AzerbaijaniNormalizer, RussianNormalizer, EnglishNormalizer, UkrainianNormalizer, \
+    GermanNormalizer
+from ovos_plugin_manager.templates.transformers import UtteranceTransformer
+
+
+class UtteranceNormalizerPlugin(UtteranceTransformer):
+    """plugin to normalize utterances by normalizing numbers, punctuation and contractions
+    language specific pre-processing is handled here too
+    this helps intent parsers"""
+
+    def __init__(self, name="ovos-utterance-normalizer", priority=1):
+        super().__init__(name, priority)
+
+    @staticmethod
+    def get_normalizer(lang: str):
+        if lang.startswith("en"):
+            return EnglishNormalizer()
+        elif lang.startswith("pt"):
+            return PortugueseNormalizer()
+        elif lang.startswith("uk"):
+            return UkrainianNormalizer()
+        elif lang.startswith("ca"):
+            return CatalanNormalizer()
+        elif lang.startswith("cz"):
+            return CzechNormalizer()
+        elif lang.startswith("az"):
+            return AzerbaijaniNormalizer()
+        elif lang.startswith("ru"):
+            return RussianNormalizer()
+        elif lang.startswith("de"):
+            return GermanNormalizer()
+        return Normalizer()
+
+    @staticmethod
+    def strip_punctuation(utterance: str):
+        return utterance.strip(string.punctuation).strip()
+
+    def transform(self, utterances: List[str],
+                  context: Optional[dict] = None) -> (list, dict):
+        context = context or {}
+        lang = context.get("lang") or self.config.get("lang", "en-us")
+        normalizer = self.get_normalizer(lang)
+
+        norm = []
+        # 1 - expand contractions
+        # 2 - original utterance
+        # 3 - normalized utterance
+        for u in utterances:
+            norm.append(normalizer.expand_contractions(u))
+            norm.append(u)
+            norm.append(normalizer.normalize(u))
+
+        if self.config.get("strip_punctuation", True):
+            norm = [self.strip_punctuation(u) for u in norm]
+
+        # this deduplicates the list while keeping order
+        return list(dict.fromkeys(norm)), context
+
diff --git a/ovos_utterance_normalizer/normalizer.py b/ovos_utterance_normalizer/normalizer.py
new file mode 100644
index 0000000..751a5ca
--- /dev/null
+++ b/ovos_utterance_normalizer/normalizer.py
@@ -0,0 +1,235 @@
+import json
+import re
+from os.path import dirname
+from typing import List, Dict
+
+from ovos_utterance_normalizer.tokenization import word_tokenize
+from ovos_utterance_normalizer.numeric import EnglishNumberParser, AzerbaijaniNumberParser, GermanNumberParser
+
+
+class Normalizer:
+    # taken from lingua_franca
+    """
+    individual languages may subclass this if needed
+
+    normalize_XX should pass a valid config read from json
+    """
+    _default_config = {}
+
+    def __init__(self, config=None):
+        self.config = config or self._default_config
+
+    @staticmethod
+    def tokenize(utterance) -> List[str]:
+        return word_tokenize(utterance)
+
+    @property
+    def should_lowercase(self) -> bool:
+        return self.config.get("lowercase", False)
+
+    @property
+    def should_numbers_to_digits(self) -> bool:
+        return self.config.get("numbers_to_digits", True)
+
+    @property
+    def should_expand_contractions(self) -> bool:
+        return self.config.get("expand_contractions", True)
+
+    @property
+    def should_remove_symbols(self) -> bool:
+        return self.config.get("remove_symbols", False)
+
+    @property
+    def should_remove_accents(self) -> bool:
+        return self.config.get("remove_accents", False)
+
+    @property
+    def should_remove_articles(self) -> bool:
+        return self.config.get("remove_articles", False)
+
+    @property
+    def should_remove_stopwords(self) -> bool:
+        return self.config.get("remove_stopwords", False)
+
+    @property
+    def contractions(self) -> Dict[str, str]:
+        return self.config.get("contractions", {})
+
+    @property
+    def word_replacements(self) -> Dict[str, str]:
+        return self.config.get("word_replacements", {})
+
+    @property
+    def number_replacements(self) -> Dict[str, str]:
+        return self.config.get("number_replacements", {})
+
+    @property
+    def accents(self) -> Dict[str, str]:
+        return self.config.get("accents",
+                               {"á": "a", "à": "a", "ã": "a", "â": "a",
+                                "é": "e", "è": "e", "ê": "e", "ẽ": "e",
+                                "í": "i", "ì": "i", "î": "i", "ĩ": "i",
+                                "ò": "o", "ó": "o", "ô": "o", "õ": "o",
+                                "ú": "u", "ù": "u", "û": "u", "ũ": "u",
+                                "Á": "A", "À": "A", "Ã": "A", "Â": "A",
+                                "É": "E", "È": "E", "Ê": "E", "Ẽ": "E",
+                                "Í": "I", "Ì": "I", "Î": "I", "Ĩ": "I",
+                                "Ò": "O", "Ó": "O", "Ô": "O", "Õ": "O",
+                                "Ú": "U", "Ù": "U", "Û": "U", "Ũ": "U"
+                                })
+
+    @property
+    def stopwords(self) -> List[str]:
+        return self.config.get("stopwords", [])
+
+    @property
+    def articles(self) -> List[str]:
+        return self.config.get("articles", [])
+
+    @property
+    def symbols(self) -> List[str]:
+        return self.config.get("symbols",
+                               [";", "_", "!", "?", "<", ">", "|",
+                                "(", ")", "=", "[", "]", "{", "}",
+                                "»", "«", "*", "~", "^", "`", "\""])
+
+    def expand_contractions(self, utterance: str) -> str:
+        """ Expand common contractions, e.g. "isn't" -> "is not" """
+        words = self.tokenize(utterance)
+        for idx, w in enumerate(words):
+            if w in self.contractions:
+                words[idx] = self.contractions[w]
+        utterance = " ".join(words)
+        return utterance
+
+    def numbers_to_digits(self, utterance: str) -> str:
+        words = self.tokenize(utterance)
+        for idx, w in enumerate(words):
+            if w in self.number_replacements:
+                words[idx] = self.number_replacements[w]
+        utterance = " ".join(words)
+        return utterance
+
+    def remove_articles(self, utterance: str) -> str:
+        words = self.tokenize(utterance)
+        for idx, w in enumerate(words):
+            if w in self.articles:
+                words[idx] = ""
+        utterance = " ".join(words)
+        return utterance
+
+    def remove_stopwords(self, utterance: str) -> str:
+        words = self.tokenize(utterance)
+        for idx, w in enumerate(words):
+            if w in self.stopwords:
+                words[idx] = ""
+        # if words[-1] == '-':
+        #    words = words[:-1]
+        utterance = " ".join(words)
+        # Remove trailing whitespaces from utterance along with orphaned
+        # hyphens, more characters may be added later
+        utterance = re.sub(r'- *$', '', utterance)
+        return utterance
+
+    def remove_symbols(self, utterance: str) -> str:
+        mapping = str.maketrans('', '', "".join(self.symbols))
+        return utterance.translate(mapping)
+
+    def remove_accents(self, utterance : str) -> str:
+        for s in self.accents:
+            utterance = utterance.replace(s, self.accents[s])
+        return utterance
+
+    def replace_words(self, utterance: str) -> str:
+        words = self.tokenize(utterance)
+        for idx, w in enumerate(words):
+            if w in self.word_replacements:
+                words[idx] = self.word_replacements[w]
+        utterance = " ".join(words)
+        return utterance
+
+    def normalize(self, utterance: str = ""):
+        # mutations
+        if self.should_lowercase:
+            utterance = utterance.lower()
+        if self.should_expand_contractions:
+            utterance = self.expand_contractions(utterance)
+        if self.should_numbers_to_digits:
+            utterance = self.numbers_to_digits(utterance)
+        utterance = self.replace_words(utterance)
+
+        # removals
+        if self.should_remove_symbols:
+            utterance = self.remove_symbols(utterance)
+        if self.should_remove_accents:
+            utterance = self.remove_accents(utterance)
+        if self.should_remove_articles:
+            utterance = self.remove_articles(utterance)
+        if self.should_remove_stopwords:
+            utterance = self.remove_stopwords(utterance)
+        # remove extra spaces
+        utterance = " ".join([w for w in utterance.split(" ") if w])
+        return utterance
+
+
+class CatalanNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/ca/normalize.json") as f:
+        _default_config = json.load(f)
+
+    @staticmethod
+    def tokenize(utterance : str) -> List[str]:
+        return word_tokenize(utterance, lang="ca")
+
+
+class CzechNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/cz/normalize.json", encoding='utf8') as f:
+        _default_config = json.load(f)
+
+
+class PortugueseNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/pt/normalize.json") as f:
+        _default_config = json.load(f)
+
+    @staticmethod
+    def tokenize(utterance: str) -> List[str]:
+        return word_tokenize(utterance, lang="pt")
+
+
+class RussianNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/ru/normalize.json", encoding='utf8') as f:
+        _default_config = json.load(f)
+
+
+class UkrainianNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/uk/normalize.json", encoding='utf8') as f:
+        _default_config = json.load(f)
+
+
+class EnglishNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/en/normalize.json") as f:
+        _default_config = json.load(f)
+
+    def numbers_to_digits(self, utterance: str) -> str:
+        return EnglishNumberParser().convert_words_to_numbers(utterance)
+
+
+class AzerbaijaniNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/az/normalize.json") as f:
+        _default_config = json.load(f)
+
+    def numbers_to_digits(self, utterance: str) -> str:
+        return AzerbaijaniNumberParser().convert_words_to_numbers(utterance)
+
+
+class GermanNormalizer(Normalizer):
+    with open(f"{dirname(dirname(__file__))}/res/de/normalize.json") as f:
+        _default_config = json.load(f)
+
+    def numbers_to_digits(self, utterance: str) -> str:
+        return GermanNumberParser().convert_words_to_numbers(utterance)
+    
+    def remove_symbols(self, utterance: str) -> str:
+        # special rule for hyphanated words in german as some STT engines falsely
+        # return them pretty regularly
+        utterance = re.sub(r'\b(\w*)-(\w*)\b', r'\1 \2', utterance)
+        return super().remove_symbols(utterance)
diff --git a/ovos_utterance_normalizer/numeric.py b/ovos_utterance_normalizer/numeric.py
new file mode 100644
index 0000000..2a40ed4
--- /dev/null
+++ b/ovos_utterance_normalizer/numeric.py
@@ -0,0 +1,2105 @@
+from collections import OrderedDict
+from typing import List
+
+from ovos_utils.json_helper import invert_dict
+from ovos_utterance_normalizer.tokenization import word_tokenize, partition_list, \
+    Token, ReplaceableNumber
+
+
+def is_numeric(word):
+    """
+    Takes in a string and tests to see if it is a number.
+    Args:
+        word (str): string to test if a number
+    Returns:
+        (bool): True if a number, else False
+
+    """
+    try:
+        float(word)
+        return True
+    except ValueError:
+        return False
+
+
+def look_for_fractions(split_list):
+    """"
+    This function takes a list made by fraction & determines if a fraction.
+
+    Args:
+        split_list (list): list created by splitting on '/'
+    Returns:
+        (bool): False if not a fraction, otherwise True
+
+    """
+
+    if len(split_list) == 2:
+        if is_numeric(split_list[0]) and is_numeric(split_list[1]):
+            return True
+
+    return False
+
+
+class GermanNumberParser:
+    # taken from lingua_franca
+    _ARTICLES_DE = {'der', 'das', 'die', 'dem', 'den'}
+
+    #_SPOKEN_NUMBER
+    _NUM_STRING_DE = {
+        0: 'null',
+        1: 'eins',
+        2: 'zwei',
+        3: 'drei',
+        4: 'vier',
+        5: 'fünf',
+        6: 'sechs',
+        7: 'sieben',
+        8: 'acht',
+        9: 'neun',
+        10: 'zehn',
+        11: 'elf',
+        12: 'zwölf',
+        13: 'dreizehn',
+        14: 'vierzehn',
+        15: 'fünfzehn',
+        16: 'sechzehn',
+        17: 'siebzehn',
+        18: 'achtzehn',
+        19: 'neunzehn',
+        20: 'zwanzig',
+        30: 'dreißig',
+        40: 'vierzig',
+        50: 'fünfzig',
+        60: 'sechzig',
+        70: 'siebzig',
+        80: 'achtzig',
+        90: 'neunzig',
+        100: 'hundert',
+        200: 'zweihundert',
+        300: 'dreihundert',
+        400: 'vierhundert',
+        500: 'fünfhundert',
+        600: 'sechshundert',
+        700: 'siebenhundert',
+        800: 'achthundert',
+        900: 'neunhundert',
+        1000: 'tausend',
+        1000000: 'million'
+    }
+
+    _STRING_NUM_DE = invert_dict(_NUM_STRING_DE)
+    _STRING_NUM_DE.update({
+        'ein': 1,
+        'eine': 1,
+        'einer': 1,
+        'einem': 1,
+        'einen': 1
+    })
+
+    _MONTHS_DE = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
+                  'juli', 'august', 'september', 'oktober', 'november',
+                  'dezember']
+
+    # German uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
+    # Currently, numbers are limited to 1000000000000000000000000,
+    # but _NUM_POWERS_OF_TEN can be extended to include additional number words
+
+
+    _NUM_POWERS_OF_TEN_DE = [
+        '', 'tausend', 'Million', 'Milliarde', 'Billion', 'Billiarde', 'Trillion',
+        'Trilliarde'
+    ]
+
+    _FRACTION_STRING_DE = {
+        2: 'halb',
+        3: 'drittel',
+        4: 'viertel',
+        5: 'fünftel',
+        6: 'sechstel',
+        7: 'siebtel',
+        8: 'achtel',
+        9: 'neuntel',
+        10: 'zehntel',
+        11: 'elftel',
+        12: 'zwölftel',
+        13: 'dreizehntel',
+        14: 'vierzehntel',
+        15: 'fünfzehntel',
+        16: 'sechzehntel',
+        17: 'siebzehntel',
+        18: 'achtzehntel',
+        19: 'neunzehntel',
+        20: 'zwanzigstel'
+    }
+
+    _STRING_FRACTION_DE = invert_dict(_FRACTION_STRING_DE)
+    _STRING_FRACTION_DE.update({
+        'halb': 2,
+        'halbe': 2,
+        'halben': 2,
+        'halbes': 2,
+        'halber': 2,
+        'halbem': 2
+    })
+
+    # Numbers below 1 million are written in one word in German, yielding very
+    # long words
+    # In some circumstances it may better to seperate individual words
+    # Set _EXTRA_SPACE_DA=" " for separating numbers below 1 million (
+    # orthographically incorrect)
+    _EXTRA_SPACE_DE = ""
+
+    _ORDINAL_BASE_DE = {
+        "1.": "erst",
+        "2.": "zweit",
+        "3.": "dritt",
+        "4.": "viert",
+        "5.": "fünft",
+        "6.": "sechst",
+        "7.": "siebt",
+        "8.": "acht",
+        "9.": "neunt",
+        "10.": "zehnt",
+        "11.": "elft",
+        "12.": "zwölft",
+        "13.": "dreizehnt",
+        "14.": "vierzehnt",
+        "15.": "fünfzehnt",
+        "16.": "sechzehnt",
+        "17.": "siebzehnt",
+        "18.": "achtzehnt",
+        "19.": "neunzehnt",
+        "20.": "zwanzigst",
+        "21.": "einundzwanzigst",
+        "22.": "zweiundzwanzigst",
+        "23.": "dreiundzwanzigst",
+        "24.": "vierundzwanzigst",
+        "25.": "fünfundzwanzigst",
+        "26.": "sechsundzwanzigst",
+        "27.": "siebenundzwanzigst",
+        "28.": "achtundzwanzigst",
+        "29.": "neunundzwanzigst",
+        "30.": "dreißigst",
+        "31.": "einunddreißigst",
+        "32.": "zweiunddreißigst",
+        "33.": "dreiunddreißigst",
+        "34.": "vierunddreißigst",
+        "35.": "fünfunddreißigst",
+        "36.": "sechsunddreißigst",
+        "37.": "siebenunddreißigst",
+        "38.": "achtunddreißigst",
+        "39.": "neununddreißigst",
+        "40.": "vierzigst",
+        "41.": "einundvierzigst",
+        "42.": "zweiundvierzigst",
+        "43.": "dreiundvierzigst",
+        "44.": "vierundvierzigst",
+        "45.": "fünfundvierzigst",
+        "46.": "sechsundvierzigst",
+        "47.": "siebenundvierzigst",
+        "48.": "achtundvierzigst",
+        "49.": "neunundvierzigst",
+        "50.": "fünfzigst",
+        "51.": "einundfünfzigst",
+        "52.": "zweiundfünfzigst",
+        "53.": "dreiundfünfzigst",
+        "60.": "sechzigst",
+        "70.": "siebzigst",
+        "80.": "achtzigst",
+        "90.": "neunzigst",
+        "100.": "einhundertst",
+        "1000.": "eintausendst",
+        "1000000.": "millionst"
+        }
+
+    _LONG_SCALE_DE = OrderedDict([
+        (100, 'hundert'),
+        (1000, 'tausend'),
+        (1000000, 'million'),
+        (1e9, "milliarde"),
+        (1e12, 'billion'),
+        (1e15, "billiarde"),
+        (1e18, "trillion"),
+        (1e21, "trilliarde"),
+        (1e24, "quadrillion"),
+        (1e27, "quadrilliarde")
+    ])
+
+    _MULTIPLIER_DE = set(_LONG_SCALE_DE.values())
+
+    _STRING_LONG_SCALE_DE = invert_dict(_LONG_SCALE_DE)
+
+    # ending manipulation
+    for number, item in _LONG_SCALE_DE.items():
+        if int(number) > 1000:
+            if item.endswith('e'):
+                name = item + 'n'
+                _MULTIPLIER_DE.add(name)
+                _STRING_LONG_SCALE_DE[name] = number
+            else:
+                name = item + 'en'
+                _MULTIPLIER_DE.add(name)
+                _STRING_LONG_SCALE_DE[name] = number
+
+    _LONG_ORDINAL_DE = {
+        1e6: "millionst",
+        1e9: "milliardst",
+        1e12: "billionst",
+        1e15: "billiardst",
+        1e18: "trillionst",
+        1e21: "trilliardst",
+        1e24: "quadrillionst",
+        1e27: "quadrilliardst"
+    }
+
+    _LONG_ORDINAL_DE.update(_ORDINAL_BASE_DE)
+
+    # dict für erste, drittem, millionstes ...
+    _STRING_LONG_ORDINAL_DE = {ord+ending: num for ord, num in invert_dict(_LONG_ORDINAL_DE).items()
+                               for ending in ("en", "em", "es", "er", "e")}
+    _FRACTION_MARKER_DE = set()
+    _NEGATIVES_DE = {"minus"}
+    _NUMBER_CONNECTORS_DE = {"und"}
+    _COMMA_DE = {"komma", "comma", "punkt"}
+
+
+    def is_ordinal_de(self, input_str):
+        """
+        This function takes the given text and checks if it is an ordinal number.
+        Args:
+            input_str (str): the string to check if ordinal
+        Returns:
+            (bool) or (float): False if not an ordinal, otherwise the number
+            corresponding to the ordinal
+        ordinals for 1, 3, 7 and 8 are irregular
+        only works for ordinals corresponding to the numbers in _STRING_NUM
+        """
+        val = self._STRING_LONG_ORDINAL_DE.get(input_str.lower(), False)
+        # account for numbered ordinals
+        if not val and input_str.endswith('.') and is_numeric(input_str[:-1]):
+            val = input_str
+        return val
+
+    def is_fractional_de(self, input_str, short_scale=False):
+        """
+        This function takes the given text and checks if it is a fraction.
+        Args:
+            input_str (str): the string to check if fractional
+            short_scale (bool): use short scale if True, long scale if False
+        Returns:
+            (bool) or (float): False if not a fraction, otherwise the fraction
+        """
+        # account for different numerators, e.g. zweidrittel
+
+        input_str = input_str.lower()
+        numerator = 1
+        prev_number = 0
+        denominator = False
+        remainder = ""
+
+        # first check if is a fraction containing a char (eg "2/3")
+        _bucket = input_str.split('/')
+        if look_for_fractions(_bucket):
+            numerator = float(_bucket[0])
+            denominator = float(_bucket[1])
+
+        if not denominator:
+            for fraction in sorted(self._STRING_FRACTION_DE.keys(),
+                                key=lambda x: len(x),
+                                reverse=True):
+                if fraction in input_str and not denominator:
+                    denominator = self._STRING_FRACTION_DE.get(fraction)
+                    remainder = input_str.replace(fraction, "")
+                    break
+
+            if remainder:
+                if not self._STRING_NUM_DE.get(remainder, False):
+                    #acount for eineindrittel
+                    for numstring, number in self._STRING_NUM_DE.items():
+                        if remainder.endswith(numstring):
+                            prev_number = self._STRING_NUM_DE.get(
+                                remainder.replace(numstring, "", 1), 0)
+                            numerator = number
+                            break
+                    else:
+                        return False
+                else:
+                    numerator = self._STRING_NUM_DE.get(remainder)
+
+        if denominator:
+            return prev_number + (numerator / denominator)
+        else:
+            return False
+
+    def is_number_de(self, word: str):
+        if self.is_ordinal_de(word):
+            return None
+
+        if is_numeric(word):
+            if word.isdigit():
+                return int(word)
+            else:
+                return float(word)
+        elif word in self._STRING_NUM_DE:
+            return self._STRING_NUM_DE.get(word)
+        elif word in self._STRING_LONG_SCALE_DE:
+            return self._STRING_LONG_SCALE_DE.get(word)
+        
+        return None
+
+    def convert_words_to_numbers(self, utterance, short_scale=False,
+                                 ordinals=False, fractions=True):
+        """
+        Convert words in a string into their equivalent numbers.
+        Args:
+            text str:
+            short_scale boolean: True if short scale numberres should be used.
+            ordinals boolean: True if ordinals (e.g. first, second, third) should
+                            be parsed to their number values (1, 2, 3...)
+        Returns:
+            str
+            The original text, with numbers subbed in where appropriate.
+        """
+        tokens = [Token(word, index) for index, word in enumerate(word_tokenize(utterance))]
+        numbers_to_replace = self.extract_numbers(tokens, short_scale, ordinals, fractions)
+
+        results = []
+        for token in tokens:
+            if not numbers_to_replace or \
+                    token.index < numbers_to_replace[0].start_index:
+                results.append(token.word)
+            else:
+                if numbers_to_replace and \
+                        token.index == numbers_to_replace[0].start_index:
+                    results.append(str(numbers_to_replace[0].value))
+                if numbers_to_replace and \
+                        token.index == numbers_to_replace[0].end_index:
+                    numbers_to_replace.pop(0)
+
+        return ' '.join(results)
+
+
+    def extract_numbers(self, tokens: list,
+                              short_scale: bool = False,
+                              ordinals: bool = False,
+                              fractions: bool = True) -> List:
+        """
+        extract numeric values from a list of tokens.
+        Args:
+            tokens (list): list of tokens (str)
+            short_scale boolean: True if short scale numbers should be used.
+            ordinals boolean: True if ordinals (e.g. first, second, third) should
+                              be parsed to their number values (1, 2, 3...)
+        Returns:
+            list of extraced numbers (ReplaceableNumber)
+
+        """
+        if not isinstance(tokens[0], Token): # list of string tokens
+            tokens = [Token(word, index) for index, word in enumerate(tokens)]
+        numbers_to_replace = self._extract_numbers_with_text_de(tokens, short_scale, ordinals, fractions)
+        numbers_to_replace.sort(key=lambda number: number.start_index)
+        return numbers_to_replace
+
+    def _extract_numbers_with_text_de(self, tokens, short_scale=True,
+                                      ordinals=False, fractions=True):
+        """
+        Extract all numbers from a list of Tokens, with the words that
+        represent them.
+
+        Args:
+            [Token]: The tokens to parse.
+            short_scale bool: True if short scale numbers should be used, False for
+                            long scale. True by default.
+            ordinals bool: True if ordinal words (first, second, third, etc) should
+                        be parsed.
+            fractional_numbers bool: True if we should look for fractions and
+                                    decimals.
+
+        Returns:
+            [ReplaceableNumber]: A list of tuples, each containing a number and a
+                            string.
+
+        """
+        placeholder = "<placeholder>"  # inserted to maintain correct indices
+        results = []
+        while True:
+            to_replace = \
+                self._extract_number_with_text_de(tokens, short_scale,
+                                                  ordinals)
+
+            if not to_replace:
+                break
+
+            if isinstance(to_replace.value, float) and not fractions:
+                pass
+            else:
+                results.append(to_replace)
+
+            tokens = [
+                t if not
+                to_replace.start_index <= t.index <= to_replace.end_index
+                else
+                Token(placeholder, t.index) for t in tokens
+            ]
+        results.sort(key=lambda n: n.start_index)
+        return results
+
+
+    def _extract_number_with_text_de(self, tokens, short_scale=True,
+                                     ordinals=False):
+        """
+        This function extracts a number from a list of Tokens.
+
+        Args:
+            tokens str: the string to normalize
+            short_scale (bool): use short scale if True, long scale if False
+            ordinals (bool): consider ordinal numbers
+            fractional_numbers (bool): True if we should look for fractions and
+                                    decimals.
+        Returns:
+            ReplaceableNumber
+
+        """
+        number, tokens = \
+            self._extract_number_with_text_de_helper(tokens, short_scale,
+                                                     ordinals)
+        return ReplaceableNumber(number, tokens)
+
+
+    def _extract_number_with_text_de_helper(self, tokens,
+                                            short_scale, ordinals):
+        """
+        Helper for _extract_number_with_text_de.
+
+        Args:
+            tokens [Token]:
+            short_scale boolean:
+            ordinals boolean:
+            fractional_numbers boolean:
+        Returns:
+            int or float, [Tokens]
+        """
+        if ordinals:
+            for token in tokens:
+                ordinal = self.is_ordinal_de(token.word)
+                if ordinal:
+                    return ordinal, [token]
+
+        return self._extract_real_number_with_text_de(tokens, short_scale)
+
+
+    def _extract_real_number_with_text_de(self, tokens, short_scale):
+        """
+        This is handling real numbers.
+
+        Args:
+            tokens [Token]:
+            short_scale boolean:
+        Returns:
+            int or float, [Tokens]
+            The value parsed, and tokens that it corresponds to.
+        """
+        number_words = []
+        val = _val = _current_val = None
+        _comma = False
+        to_sum = []
+
+        for idx, token in enumerate(tokens):
+
+            _prev_val = _current_val
+            _current_val = None
+
+            word = token.word
+
+            if word in self._NUMBER_CONNECTORS_DE and not number_words:
+                continue
+            if word in (self._NEGATIVES_DE |
+                        self._NUMBER_CONNECTORS_DE |
+                        self._COMMA_DE):
+                number_words.append(token)
+                if word in self._COMMA_DE:
+                    _comma = token
+                    _current_val = _val or _prev_val
+                continue
+
+            prev_word = tokens[idx - 1].word if idx > 0 else ""
+            next_word = tokens[idx + 1].word if idx + 1 < len(tokens) else ""
+
+            if word not in self._STRING_LONG_SCALE_DE and \
+                    word not in self._STRING_NUM_DE and \
+                    word not in self._MULTIPLIER_DE and \
+                    not is_numeric(word) and \
+                    not self.is_fractional_de(word):
+                words_only = [token.word for token in number_words]
+                if _val is not None:
+                    to_sum.append(_val)
+                if to_sum:
+                    val = sum(to_sum)
+
+                if number_words and (not all([w in self._ARTICLES_DE |
+                                                   self._NEGATIVES_DE|
+                                                   self._NUMBER_CONNECTORS_DE
+                                              for w in words_only])
+                                or str(val) == number_words[-1].word):
+                    break
+                else:
+                    number_words.clear()
+                    to_sum.clear()
+                    val = _val = _prev_val = None
+                continue
+            elif word not in self._MULTIPLIER_DE \
+                    and prev_word not in self._MULTIPLIER_DE \
+                    and prev_word not in self._NUMBER_CONNECTORS_DE \
+                    and prev_word not in self._NEGATIVES_DE \
+                    and prev_word not in self._COMMA_DE \
+                    and prev_word not in self._STRING_LONG_SCALE_DE \
+                    and prev_word not in self._STRING_NUM_DE \
+                    and not self.is_ordinal_de(word) \
+                    and not is_numeric(prev_word)  \
+                    and not self.is_fractional_de(prev_word):
+                number_words = [token]
+            else:
+                number_words.append(token)
+
+            # is this word already a number or a word of a number?
+            _val = _current_val = self.is_number_de(word)
+
+            # is this a negative number?
+            if _current_val is not None and prev_word in self._NEGATIVES_DE:
+                _val = 0 - _current_val
+            
+            # is the prev word a number and should we multiply it?
+            if _prev_val is not None and ( word in self._MULTIPLIER_DE or \
+                word in ("einer", "eines", "einem")):
+                to_sum.append(_prev_val * _current_val or _current_val)
+                _val = _current_val = None
+            
+            # fraction handling
+            _fraction_val = self.is_fractional_de(word, short_scale=short_scale)
+            if _fraction_val:
+                if _prev_val is not None and prev_word != "eine" and \
+                        word not in self._STRING_FRACTION_DE:   # zusammengesetzter Bruch
+                    _val = _prev_val + _fraction_val
+                    if prev_word not in self._NUMBER_CONNECTORS_DE \
+                            and tokens[idx -1] not in number_words:
+                        number_words.append(tokens[idx - 1])
+                elif _prev_val is not None:
+                    _val = _prev_val * _fraction_val
+                    if tokens[idx -1] not in number_words:
+                        number_words.append(tokens[idx - 1])
+                else:
+                    _val = _fraction_val
+                _current_val = _val
+            
+            # directly following numbers without relation
+            if (is_numeric(prev_word) or prev_word in self._STRING_NUM_DE) \
+                    and not _fraction_val \
+                    and not self.is_fractional_de(next_word) \
+                    and not to_sum:
+                val = _prev_val
+                number_words.pop(-1)
+                break
+
+            # is this a spoken time ("drei viertel acht")
+            if isinstance(_prev_val, float) and self.is_number_de(word) and not to_sum:
+                if idx+1 < len(tokens):
+                    _, number = self._extract_real_number_with_text_de([tokens[idx + 1]],
+                                                                       short_scale=short_scale)
+                if not next_word or not number:
+                    val = f"{_val-1}:{int(60*_prev_val)}"
+                    break
+ 
+            # spoken decimals
+            if _current_val is not None and _comma:
+                # to_sum = [ 1, 0.2, 0.04,...]
+                to_sum.append(_current_val if _current_val >= 10 else (
+                    _current_val) / (10 ** (token.index - _comma.index)))
+                _val = _current_val = None
+
+            if _current_val is not None and \
+                    next_word in (self._NUMBER_CONNECTORS_DE | self._COMMA_DE | {""}):
+                to_sum.append(_val or _current_val)
+                _val = _current_val = None
+            
+            if not next_word and number_words:
+                val = sum(to_sum) or _val
+
+        return val, number_words
+
+
+# TODO - finish adding public user facing methods
+class EnglishNumberParser:
+    # taken from lingua_franca
+
+    # TODO - from json file
+    _ARTICLES_EN = {'a', 'an', 'the'}
+    _NUM_STRING_EN = {
+        0: 'zero',
+        1: 'one',
+        2: 'two',
+        3: 'three',
+        4: 'four',
+        5: 'five',
+        6: 'six',
+        7: 'seven',
+        8: 'eight',
+        9: 'nine',
+        10: 'ten',
+        11: 'eleven',
+        12: 'twelve',
+        13: 'thirteen',
+        14: 'fourteen',
+        15: 'fifteen',
+        16: 'sixteen',
+        17: 'seventeen',
+        18: 'eighteen',
+        19: 'nineteen',
+        20: 'twenty',
+        30: 'thirty',
+        40: 'forty',
+        50: 'fifty',
+        60: 'sixty',
+        70: 'seventy',
+        80: 'eighty',
+        90: 'ninety'
+    }
+    _FRACTION_STRING_EN = {
+        2: 'half',
+        3: 'third',
+        4: 'forth',
+        5: 'fifth',
+        6: 'sixth',
+        7: 'seventh',
+        8: 'eigth',
+        9: 'ninth',
+        10: 'tenth',
+        11: 'eleventh',
+        12: 'twelveth',
+        13: 'thirteenth',
+        14: 'fourteenth',
+        15: 'fifteenth',
+        16: 'sixteenth',
+        17: 'seventeenth',
+        18: 'eighteenth',
+        19: 'nineteenth',
+        20: 'twentyith'
+    }
+    _LONG_SCALE_EN = OrderedDict([
+        (100, 'hundred'),
+        (1000, 'thousand'),
+        (1000000, 'million'),
+        (1e12, "billion"),
+        (1e18, 'trillion'),
+        (1e24, "quadrillion"),
+        (1e30, "quintillion"),
+        (1e36, "sextillion"),
+        (1e42, "septillion"),
+        (1e48, "octillion"),
+        (1e54, "nonillion"),
+        (1e60, "decillion"),
+        (1e66, "undecillion"),
+        (1e72, "duodecillion"),
+        (1e78, "tredecillion"),
+        (1e84, "quattuordecillion"),
+        (1e90, "quinquadecillion"),
+        (1e96, "sedecillion"),
+        (1e102, "septendecillion"),
+        (1e108, "octodecillion"),
+        (1e114, "novendecillion"),
+        (1e120, "vigintillion"),
+        (1e306, "unquinquagintillion"),
+        (1e312, "duoquinquagintillion"),
+        (1e336, "sesquinquagintillion"),
+        (1e366, "unsexagintillion")
+    ])
+    _SHORT_SCALE_EN = OrderedDict([
+        (100, 'hundred'),
+        (1000, 'thousand'),
+        (1000000, 'million'),
+        (1e9, "billion"),
+        (1e12, 'trillion'),
+        (1e15, "quadrillion"),
+        (1e18, "quintillion"),
+        (1e21, "sextillion"),
+        (1e24, "septillion"),
+        (1e27, "octillion"),
+        (1e30, "nonillion"),
+        (1e33, "decillion"),
+        (1e36, "undecillion"),
+        (1e39, "duodecillion"),
+        (1e42, "tredecillion"),
+        (1e45, "quattuordecillion"),
+        (1e48, "quinquadecillion"),
+        (1e51, "sedecillion"),
+        (1e54, "septendecillion"),
+        (1e57, "octodecillion"),
+        (1e60, "novendecillion"),
+        (1e63, "vigintillion"),
+        (1e66, "unvigintillion"),
+        (1e69, "uuovigintillion"),
+        (1e72, "tresvigintillion"),
+        (1e75, "quattuorvigintillion"),
+        (1e78, "quinquavigintillion"),
+        (1e81, "qesvigintillion"),
+        (1e84, "septemvigintillion"),
+        (1e87, "octovigintillion"),
+        (1e90, "novemvigintillion"),
+        (1e93, "trigintillion"),
+        (1e96, "untrigintillion"),
+        (1e99, "duotrigintillion"),
+        (1e102, "trestrigintillion"),
+        (1e105, "quattuortrigintillion"),
+        (1e108, "quinquatrigintillion"),
+        (1e111, "sestrigintillion"),
+        (1e114, "septentrigintillion"),
+        (1e117, "octotrigintillion"),
+        (1e120, "noventrigintillion"),
+        (1e123, "quadragintillion"),
+        (1e153, "quinquagintillion"),
+        (1e183, "sexagintillion"),
+        (1e213, "septuagintillion"),
+        (1e243, "octogintillion"),
+        (1e273, "nonagintillion"),
+        (1e303, "centillion"),
+        (1e306, "uncentillion"),
+        (1e309, "duocentillion"),
+        (1e312, "trescentillion"),
+        (1e333, "decicentillion"),
+        (1e336, "undecicentillion"),
+        (1e363, "viginticentillion"),
+        (1e366, "unviginticentillion"),
+        (1e393, "trigintacentillion"),
+        (1e423, "quadragintacentillion"),
+        (1e453, "quinquagintacentillion"),
+        (1e483, "sexagintacentillion"),
+        (1e513, "septuagintacentillion"),
+        (1e543, "ctogintacentillion"),
+        (1e573, "nonagintacentillion"),
+        (1e603, "ducentillion"),
+        (1e903, "trecentillion"),
+        (1e1203, "quadringentillion"),
+        (1e1503, "quingentillion"),
+        (1e1803, "sescentillion"),
+        (1e2103, "septingentillion"),
+        (1e2403, "octingentillion"),
+        (1e2703, "nongentillion"),
+        (1e3003, "millinillion")
+    ])
+    _ORDINAL_BASE_EN = {
+        1: 'first',
+        2: 'second',
+        3: 'third',
+        4: 'fourth',
+        5: 'fifth',
+        6: 'sixth',
+        7: 'seventh',
+        8: 'eighth',
+        9: 'ninth',
+        10: 'tenth',
+        11: 'eleventh',
+        12: 'twelfth',
+        13: 'thirteenth',
+        14: 'fourteenth',
+        15: 'fifteenth',
+        16: 'sixteenth',
+        17: 'seventeenth',
+        18: 'eighteenth',
+        19: 'nineteenth',
+        20: 'twentieth',
+        30: 'thirtieth',
+        40: "fortieth",
+        50: "fiftieth",
+        60: "sixtieth",
+        70: "seventieth",
+        80: "eightieth",
+        90: "ninetieth",
+        1e2: "hundredth",
+        1e3: "thousandth"
+    }
+    _SHORT_ORDINAL_EN = {
+        1e6: "millionth",
+        1e9: "billionth",
+        1e12: "trillionth",
+        1e15: "quadrillionth",
+        1e18: "quintillionth",
+        1e21: "sextillionth",
+        1e24: "septillionth",
+        1e27: "octillionth",
+        1e30: "nonillionth",
+        1e33: "decillionth"
+        # TODO > 1e-33
+    }
+    _SHORT_ORDINAL_EN.update(_ORDINAL_BASE_EN)
+    _LONG_ORDINAL_EN = {
+        1e6: "millionth",
+        1e12: "billionth",
+        1e18: "trillionth",
+        1e24: "quadrillionth",
+        1e30: "quintillionth",
+        1e36: "sextillionth",
+        1e42: "septillionth",
+        1e48: "octillionth",
+        1e54: "nonillionth",
+        1e60: "decillionth"
+        # TODO > 1e60
+    }
+    _LONG_ORDINAL_EN.update(_ORDINAL_BASE_EN)
+    # negate next number (-2 = 0 - 2)
+    _NEGATIVES_EN = {"negative", "minus"}
+    # sum the next number (twenty two = 20 + 2)
+    _SUMS_EN = {'twenty', '20', 'thirty', '30', 'forty', '40', 'fifty', '50',
+                'sixty', '60', 'seventy', '70', 'eighty', '80', 'ninety', '90'}
+    _MULTIPLIES_LONG_SCALE_EN = set(_LONG_SCALE_EN.values()) | \
+                                {value + "s" for value in _LONG_SCALE_EN.values()}
+    _MULTIPLIES_SHORT_SCALE_EN = set(_SHORT_SCALE_EN.values()) | \
+                                 {value + "s" for value in _SHORT_SCALE_EN.values()}
+    # split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
+    _FRACTION_MARKER_EN = {"and"}
+    # decimal marker ( 1 point 5 = 1 + 0.5)
+    _DECIMAL_MARKER_EN = {"point", "dot"}
+    _STRING_NUM_EN = {v: k for k, v in _NUM_STRING_EN.items()}
+    _STRING_NUM_EN.update({key + 's': value for key, value in _STRING_NUM_EN.items()})
+    _SPOKEN_EXTRA_NUM_EN = {
+        "half": 0.5,
+        "halves": 0.5,
+        "couple": 2
+    }
+    _STRING_SHORT_ORDINAL_EN = {v: k for k, v in _SHORT_ORDINAL_EN.items()}
+    _STRING_LONG_ORDINAL_EN = {v: k for k, v in _LONG_ORDINAL_EN.items()}
+
+    def is_fractional(self, input_str, short_scale=True, spoken=True):
+        """
+        This function takes the given text and checks if it is a fraction.
+
+        Args:
+            input_str (str): the string to check if fractional
+            short_scale (bool): use short scale if True, long scale if False
+            spoken (bool): consider "half", "quarter", "whole" a fraction
+        Returns:
+            (bool) or (float): False if not a fraction, otherwise the fraction
+
+        """
+        if input_str.endswith('s', -1):
+            input_str = input_str[:len(input_str) - 1]  # e.g. "fifths"
+
+        fracts = {"whole": 1, "half": 2, "halve": 2, "quarter": 4}
+        if short_scale:
+            for num in self._SHORT_ORDINAL_EN:
+                if num > 2:
+                    fracts[self._SHORT_ORDINAL_EN[num]] = num
+        else:
+            for num in self._LONG_ORDINAL_EN:
+                if num > 2:
+                    fracts[self._LONG_ORDINAL_EN[num]] = num
+
+        if input_str.lower() in fracts and spoken:
+            return 1.0 / fracts[input_str.lower()]
+        return False
+
+    def convert_words_to_numbers(self, utterance, short_scale=True, ordinals=False):
+        """
+        Convert words in a string into their equivalent numbers.
+        Args:
+            text str:
+            short_scale boolean: True if short scale numbers should be used.
+            ordinals boolean: True if ordinals (e.g. first, second, third) should
+                              be parsed to their number values (1, 2, 3...)
+
+        Returns:
+            str
+            The original text, with numbers subbed in where appropriate.
+
+        """
+        tokens = [Token(word, index) for index, word in enumerate(word_tokenize(utterance))]
+        numbers_to_replace = self.extract_numbers(tokens, short_scale, ordinals)
+
+        results = []
+        for token in tokens:
+            if not numbers_to_replace or \
+                    token.index < numbers_to_replace[0].start_index:
+                results.append(token.word)
+            else:
+                if numbers_to_replace and \
+                        token.index == numbers_to_replace[0].start_index:
+                    results.append(str(numbers_to_replace[0].value))
+                if numbers_to_replace and \
+                        token.index == numbers_to_replace[0].end_index:
+                    numbers_to_replace.pop(0)
+
+        return ' '.join(results)
+
+    def extract_numbers(self, tokens: list, short_scale: bool=True, ordinals: bool=False) -> List:
+        """
+        extract numeric values from a list of tokens.
+        Args:
+            tokens (list): list of tokens (str)
+            short_scale boolean: True if short scale numbers should be used.
+            ordinals boolean: True if ordinals (e.g. first, second, third) should
+                              be parsed to their number values (1, 2, 3...)
+        Returns:
+            list of extraced numbers (ReplaceableNumber)
+
+        """
+        if not isinstance(tokens[0], Token): # list of string tokens
+            tokens = [Token(word, index) for index, word in enumerate(tokens)]
+        numbers_to_replace = self._extract_numbers_with_text_en(tokens, short_scale, ordinals)
+        numbers_to_replace.sort(key=lambda number: number.start_index)
+        return numbers_to_replace
+
+    # helper methods
+    def _initialize_number_data_en(self, short_scale, speech=True):
+        """
+        Generate dictionaries of words to numbers, based on scale.
+
+        This is a helper function for _extract_whole_number.
+
+        Args:
+            short_scale (bool):
+            speech (bool): consider extra words (_SPOKEN_EXTRA_NUM_EN) to be numbers
+
+        Returns:
+            (set(str), dict(str, number), dict(str, number))
+            multiplies, string_num_ordinal, string_num_scale
+
+        """
+        multiplies = self._MULTIPLIES_SHORT_SCALE_EN if short_scale \
+            else self._MULTIPLIES_LONG_SCALE_EN
+
+        string_num_ordinal_en = self._STRING_SHORT_ORDINAL_EN if short_scale \
+            else self._STRING_LONG_ORDINAL_EN
+
+        string_num_scale_en = self._SHORT_SCALE_EN if short_scale else self._LONG_SCALE_EN
+        string_num_scale_en = {v: k for k, v in string_num_scale_en.items()}
+        string_num_scale_en.update({key + 's': value for key, value in string_num_scale_en.items()})
+
+        if speech:
+            string_num_scale_en.update(self._SPOKEN_EXTRA_NUM_EN)
+        return multiplies, string_num_ordinal_en, string_num_scale_en
+
+    def _extract_fraction_with_text_en(self, tokens, short_scale, ordinals):
+        """
+        Extract fraction numbers from a string.
+
+        This function handles text such as '2 and 3/4'. Note that "one half" or
+        similar will be parsed by the whole number function.
+
+        Args:
+            tokens [Token]: words and their indexes in the original string.
+            short_scale boolean:
+            ordinals boolean:
+
+        Returns:
+            (int or float, [Token])
+            The value found, and the list of relevant tokens.
+            (None, None) if no fraction value is found.
+
+        """
+        for c in self._FRACTION_MARKER_EN:
+            partitions = partition_list(tokens, lambda t: t.word == c)
+
+            if len(partitions) == 3:
+                numbers1 = \
+                    self._extract_numbers_with_text_en(partitions[0], short_scale,
+                                                       ordinals, fractional_numbers=False)
+                numbers2 = \
+                    self._extract_numbers_with_text_en(partitions[2], short_scale,
+                                                       ordinals, fractional_numbers=True)
+
+                if not numbers1 or not numbers2:
+                    return None, None
+
+                # ensure first is not a fraction and second is a fraction
+                num1 = numbers1[-1]
+                num2 = numbers2[0]
+                if num1.value >= 1 and 0 < num2.value < 1:
+                    return num1.value + num2.value, \
+                           num1.tokens + partitions[1] + num2.tokens
+
+        return None, None
+
+    def _extract_decimal_with_text_en(self, tokens, short_scale, ordinals):
+        """
+        Extract decimal numbers from a string.
+
+        This function handles text such as '2 point 5'.
+
+        Notes:
+            While this is a helper for extractnumber_en, it also depends on
+            extractnumber_en, to parse out the components of the decimal.
+
+            This does not currently handle things like:
+                number dot number number number
+
+        Args:
+            tokens [Token]: The text to parse.
+            short_scale boolean:
+            ordinals boolean:
+
+        Returns:
+            (float, [Token])
+            The value found and relevant tokens.
+            (None, None) if no decimal value is found.
+
+        """
+        for c in self._DECIMAL_MARKER_EN:
+            partitions = partition_list(tokens, lambda t: t.word == c)
+
+            if len(partitions) == 3:
+                numbers1 = \
+                    self._extract_numbers_with_text_en(partitions[0], short_scale,
+                                                       ordinals, fractional_numbers=False)
+                numbers2 = \
+                    self._extract_numbers_with_text_en(partitions[2], short_scale,
+                                                       ordinals, fractional_numbers=False)
+
+                if not numbers1 or not numbers2:
+                    return None, None
+
+                number = numbers1[-1]
+                decimal = numbers2[0]
+
+                # TODO handle number dot number number number
+                if "." not in str(decimal.text):
+                    return number.value + float('0.' + str(decimal.value)), \
+                           number.tokens + partitions[1] + decimal.tokens
+        return None, None
+
+    def _extract_whole_number_with_text_en(self, tokens, short_scale, ordinals):
+        """
+        Handle numbers not handled by the decimal or fraction functions. This is
+        generally whole numbers. Note that phrases such as "one half" will be
+        handled by this function, while "one and a half" are handled by the
+        fraction function.
+
+        Args:
+            tokens [Token]:
+            short_scale boolean:
+            ordinals boolean:
+
+        Returns:
+            int or float, [Tokens]
+            The value parsed, and tokens that it corresponds to.
+
+        """
+        multiplies, string_num_ordinal, string_num_scale = \
+            self._initialize_number_data_en(short_scale, speech=ordinals is not None)
+
+        number_words = []  # type: List[Token]
+        val = False
+        prev_val = None
+        next_val = None
+        to_sum = []
+        for idx, token in enumerate(tokens):
+            current_val = None
+            if next_val:
+                next_val = None
+                continue
+
+            word = token.word.lower()
+            if word in self._ARTICLES_EN or word in self._NEGATIVES_EN:
+                number_words.append(token)
+                continue
+
+            prev_word = tokens[idx - 1].word.lower() if idx > 0 else ""
+            next_word = tokens[idx + 1].word.lower() if idx + 1 < len(tokens) else ""
+
+            if is_numeric(word[:-2]) and \
+                    (word.endswith("st") or word.endswith("nd") or
+                     word.endswith("rd") or word.endswith("th")):
+
+                # explicit ordinals, 1st, 2nd, 3rd, 4th.... Nth
+                word = word[:-2]
+
+                # handle nth one
+                if next_word == "one":
+                    # would return 1 instead otherwise
+                    tokens[idx + 1] = Token("", idx)
+                    next_word = ""
+
+            # TODO replaces the wall of "and" and "or" with all() or any() as
+            #  appropriate, the whole codebase should be checked for this pattern
+            if word not in string_num_scale and \
+                    word not in self._STRING_NUM_EN and \
+                    word not in self._SUMS_EN and \
+                    word not in multiplies and \
+                    not (ordinals and word in string_num_ordinal) and \
+                    not is_numeric(word) and \
+                    not self.is_fractional(word, short_scale=short_scale) and \
+                    not look_for_fractions(word.split('/')):
+                words_only = [token.word for token in number_words]
+
+                if number_words and not all([w.lower() in self._ARTICLES_EN |
+                                             self._NEGATIVES_EN for w in words_only]):
+                    break
+                else:
+                    number_words = []
+                    continue
+            elif word not in multiplies \
+                    and prev_word not in multiplies \
+                    and prev_word not in self._SUMS_EN \
+                    and not (ordinals and prev_word in string_num_ordinal) \
+                    and prev_word not in self._NEGATIVES_EN \
+                    and prev_word not in self._ARTICLES_EN:
+                number_words = [token]
+
+            elif prev_word in self._SUMS_EN and word in self._SUMS_EN:
+                number_words = [token]
+            elif ordinals is None and \
+                    (word in string_num_ordinal or word in self._SPOKEN_EXTRA_NUM_EN):
+                # flagged to ignore this token
+                continue
+            else:
+                number_words.append(token)
+
+            # is this word already a number ?
+            if is_numeric(word):
+                if word.isdigit():  # doesn't work with decimals
+                    val = int(word)
+                else:
+                    val = float(word)
+                current_val = val
+
+            # is this word the name of a number ?
+            if word in self._STRING_NUM_EN:
+                val = self._STRING_NUM_EN.get(word)
+                current_val = val
+            elif word in string_num_scale:
+                val = string_num_scale.get(word)
+                current_val = val
+            elif ordinals and word in string_num_ordinal:
+                val = string_num_ordinal[word]
+                current_val = val
+
+            # is the prev word an ordinal number and current word is one?
+            # second one, third one
+            if ordinals and prev_word in string_num_ordinal and val == 1:
+                val = prev_val
+
+            # is the prev word a number and should we sum it?
+            # twenty two, fifty six
+            if (prev_word in self._SUMS_EN and val and val < 10) or all([prev_word in
+                                                                         multiplies,
+                                                                         val < prev_val if prev_val else False]):
+                val = prev_val + val
+
+            # is the prev word a number and should we multiply it?
+            # twenty hundred, six hundred
+            if word in multiplies:
+                if not prev_val:
+                    prev_val = 1
+                val = prev_val * val
+
+            # is this a spoken fraction?
+            # half cup
+            if val is False and \
+                    not (ordinals is None and word in string_num_ordinal):
+                val = self.is_fractional(word, short_scale=short_scale,
+                                         spoken=ordinals is not None)
+
+                current_val = val
+
+            # 2 fifths
+            if ordinals is False:
+                next_val = self.is_fractional(next_word, short_scale=short_scale)
+                if next_val:
+                    if not val:
+                        val = 1
+                    val = val * next_val
+                    number_words.append(tokens[idx + 1])
+
+            # is this a negative number?
+            if val and prev_word and prev_word in self._NEGATIVES_EN:
+                val = 0 - val
+
+            # let's make sure it isn't a fraction
+            if not val:
+                # look for fractions like "2/3"
+                aPieces = word.split('/')
+                if look_for_fractions(aPieces):
+                    val = float(aPieces[0]) / float(aPieces[1])
+                    current_val = val
+
+            else:
+                if current_val and all([
+                    prev_word in self._SUMS_EN,
+                    word not in self._SUMS_EN,
+                    word not in multiplies,
+                    current_val >= 10]):
+                    # Backtrack - we've got numbers we can't sum.
+                    number_words.pop()
+                    val = prev_val
+                    break
+                prev_val = val
+
+                if word in multiplies and next_word not in multiplies:
+                    # handle long numbers
+                    # six hundred sixty six
+                    # two million five hundred thousand
+                    #
+                    # This logic is somewhat complex, and warrants
+                    # extensive documentation for the next coder's sake.
+                    #
+                    # The current word is a power of ten. `current_val` is
+                    # its integer value. `val` is our working sum
+                    # (above, when `current_val` is 1 million, `val` is
+                    # 2 million.)
+                    #
+                    # We have a dict `string_num_scale` containing [value, word]
+                    # pairs for "all" powers of ten: string_num_scale[10] == "ten.
+                    #
+                    # We need go over the rest of the tokens, looking for other
+                    # powers of ten. If we find one, we compare it with the current
+                    # value, to see if it's smaller than the current power of ten.
+                    #
+                    # Numbers which are not powers of ten will be passed over.
+                    #
+                    # If all the remaining powers of ten are smaller than our
+                    # current value, we can set the current value aside for later,
+                    # and begin extracting another portion of our final result.
+                    # For example, suppose we have the following string.
+                    # The current word is "million".`val` is 9000000.
+                    # `current_val` is 1000000.
+                    #
+                    #    "nine **million** nine *hundred* seven **thousand**
+                    #     six *hundred* fifty seven"
+                    #
+                    # Iterating over the rest of the string, the current
+                    # value is larger than all remaining powers of ten.
+                    #
+                    # The if statement passes, and nine million (9000000)
+                    # is appended to `to_sum`.
+                    #
+                    # The main variables are reset, and the main loop begins
+                    # assembling another number, which will also be appended
+                    # under the same conditions.
+                    #
+                    # By the end of the main loop, to_sum will be a list of each
+                    # "place" from 100 up: [9000000, 907000, 600]
+                    #
+                    # The final three digits will be added to the sum of that list
+                    # at the end of the main loop, to produce the extracted number:
+                    #
+                    #    sum([9000000, 907000, 600]) + 57
+                    # == 9,000,000 + 907,000 + 600 + 57
+                    # == 9,907,657
+                    #
+                    # >>> foo = "nine million nine hundred seven thousand six
+                    #            hundred fifty seven"
+                    # >>> extract_number(foo)
+                    # 9907657
+
+                    time_to_sum = True
+                    for other_token in tokens[idx + 1:]:
+                        if other_token.word.lower() in multiplies:
+                            if string_num_scale[other_token.word.lower()] >= current_val:
+                                time_to_sum = False
+                            else:
+                                continue
+                        if not time_to_sum:
+                            break
+                    if time_to_sum:
+                        to_sum.append(val)
+                        val = 0
+                        prev_val = 0
+
+        if val is not None and to_sum:
+            val += sum(to_sum)
+
+        return val, number_words
+
+    def _extract_number_with_text_en_helper(self, tokens,
+                                            short_scale=True, ordinals=False,
+                                            fractional_numbers=True):
+        """
+        Helper for _extract_number_with_text_en.
+
+        This contains the real logic for parsing, but produces
+        a result that needs a little cleaning (specific, it may
+        contain leading articles that can be trimmed off).
+
+        Args:
+            tokens [Token]:
+            short_scale boolean:
+            ordinals boolean:
+            fractional_numbers boolean:
+
+        Returns:
+            int or float, [Tokens]
+
+        """
+        if fractional_numbers:
+            fraction, fraction_text = \
+                self._extract_fraction_with_text_en(tokens, short_scale, ordinals)
+            if fraction:
+                return fraction, fraction_text
+
+            decimal, decimal_text = \
+                self._extract_decimal_with_text_en(tokens, short_scale, ordinals)
+            if decimal:
+                return decimal, decimal_text
+
+        return self._extract_whole_number_with_text_en(tokens, short_scale, ordinals)
+
+    def _extract_number_with_text_en(self, tokens, short_scale=True,
+                                     ordinals=False, fractional_numbers=True):
+        """
+        This function extracts a number from a list of Tokens.
+
+        Args:
+            tokens str: the string to normalize
+            short_scale (bool): use short scale if True, long scale if False
+            ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+            fractional_numbers (bool): True if we should look for fractions and
+                                       decimals.
+        Returns:
+            ReplaceableNumber
+
+        """
+        number, tokens = \
+            self._extract_number_with_text_en_helper(tokens, short_scale,
+                                                     ordinals, fractional_numbers)
+        while tokens and tokens[0].word in self._ARTICLES_EN:
+            tokens.pop(0)
+        return ReplaceableNumber(number, tokens)
+
+    def _extract_numbers_with_text_en(self, tokens, short_scale=True,
+                                      ordinals=False, fractional_numbers=True):
+        """
+        Extract all numbers from a list of Tokens, with the words that
+        represent them.
+
+        Args:
+            [Token]: The tokens to parse.
+            short_scale bool: True if short scale numbers should be used, False for
+                              long scale. True by default.
+            ordinals bool: True if ordinal words (first, second, third, etc) should
+                           be parsed.
+            fractional_numbers bool: True if we should look for fractions and
+                                     decimals.
+
+        Returns:
+            [ReplaceableNumber]: A list of tuples, each containing a number and a
+                             string.
+
+        """
+        placeholder = "<placeholder>"  # inserted to maintain correct indices
+        results = []
+        while True:
+            to_replace = \
+                self._extract_number_with_text_en(tokens, short_scale,
+                                                  ordinals, fractional_numbers)
+
+            if not to_replace:
+                break
+
+            results.append(to_replace)
+
+            tokens = [
+                t if not
+                to_replace.start_index <= t.index <= to_replace.end_index
+                else
+                Token(placeholder, t.index) for t in tokens
+            ]
+        results.sort(key=lambda n: n.start_index)
+        return results
+
+
+class AzerbaijaniNumberParser:
+    # taken from lingua_franca
+
+    # TODO - from json file
+    _NUM_STRING_AZ = {
+        0: 'sıfır',
+        1: 'bir',
+        2: 'iki',
+        3: 'üç',
+        4: 'dörd',
+        5: 'beş',
+        6: 'altı',
+        7: 'yeddi',
+        8: 'səkkiz',
+        9: 'doqquz',
+        10: 'on',
+        11: 'on bir',
+        12: 'on iki',
+        13: 'on üç',
+        14: 'on dörd',
+        15: 'on beş',
+        16: 'on altı',
+        17: 'on yeddi',
+        18: 'on səkkiz',
+        19: 'on doqquz',
+        20: 'iyirmi',
+        30: 'otuz',
+        40: 'qırx',
+        50: 'əlli',
+        60: 'altmış',
+        70: 'yetmiş',
+        80: 'səksən',
+        90: 'doxsan'
+    }
+    _FRACTION_STRING_AZ = {
+        2: 'ikidə',
+        3: 'üçdə',
+        4: 'dörddə',
+        5: 'beşdə',
+        6: 'altıda',
+        7: 'yeddidə',
+        8: 'səkkizdə',
+        9: 'doqquzda',
+        10: 'onda',
+        11: 'on birdə',
+        12: 'on ikidə',
+        13: 'on üçdə',
+        14: 'on dörddə',
+        15: 'on beşdə',
+        16: 'on altıda',
+        17: 'on yeddidə',
+        18: 'on səkkizdə',
+        19: 'on doqquzda',
+        20: 'iyirmidə',
+        30: 'otuzda',
+        40: 'qırxda',
+        50: 'əllidə',
+        60: 'altmışda',
+        70: 'yetmişdə',
+        80: 'səksəndə',
+        90: 'doxsanda',
+        1e2: 'yüzdə',
+        1e3: 'mində'
+    }
+    _LONG_SCALE_AZ = OrderedDict([
+        (100, 'yüz'),
+        (1000, 'min'),
+        (1000000, 'milyon'),
+        (1e12, "milyard"),
+        (1e18, 'trilyon'),
+        (1e24, "kvadrilyon"),
+        (1e30, "kvintilyon"),
+        (1e36, "sekstilyon"),
+        (1e42, "septilyon"),
+        (1e48, "oktilyon"),
+        (1e54, "nonilyon"),
+        (1e60, "dekilyon")
+    ])
+    _SHORT_SCALE_AZ = OrderedDict([
+        (100, 'yüz'),
+        (1000, 'min'),
+        (1000000, 'milyon'),
+        (1e9, "milyard"),
+        (1e12, 'trilyon'),
+        (1e15, "kvadrilyon"),
+        (1e18, "kvintilyon"),
+        (1e21, "sekstilyon"),
+        (1e24, "septilyon"),
+        (1e27, "oktilyon"),
+        (1e30, "nonilyon"),
+        (1e33, "dekilyon")
+    ])
+    _ORDINAL_BASE_AZ = {
+        1: 'birinci',
+        2: 'ikinci',
+        3: 'üçüncü',
+        4: 'dördüncü',
+        5: 'beşinci',
+        6: 'altıncı',
+        7: 'yeddinci',
+        8: 'səkkizinci',
+        9: 'doqquzuncu',
+        10: 'onuncu',
+        11: 'on birinci',
+        12: 'on ikinci',
+        13: 'on üçüncü',
+        14: 'on dördüncü',
+        15: 'on beşinci',
+        16: 'on altıncı',
+        17: 'on yeddinci',
+        18: 'on səkkizinci',
+        19: 'on doqquzuncu',
+        20: 'iyirminci',
+        30: 'otuzuncu',
+        40: "qırxıncı",
+        50: "əllinci",
+        60: "altmışıncı",
+        70: "yetmışinci",
+        80: "səksəninci",
+        90: "doxsanınçı",
+        1e2: "yüzüncü",
+        1e3: "mininci"
+    }
+    _SHORT_ORDINAL_AZ = {
+        1e6: "milyonuncu",
+        1e9: "milyardıncı",
+        1e12: "trilyonuncu",
+        1e15: "kvadrilyonuncu",
+        1e18: "kvintilyonuncu",
+        1e21: "sekstilyonuncu",
+        1e24: "septilyonuncu",
+        1e27: "oktilyonuncu",
+        1e30: "nonilyonuncu",
+        1e33: "dekilyonuncu"
+        # TODO > 1e-33
+    }
+    _SHORT_ORDINAL_AZ.update(_ORDINAL_BASE_AZ)
+    _LONG_ORDINAL_AZ = {
+        1e6: "milyonuncu",
+        1e12: "milyardıncı",
+        1e18: "trilyonuncu",
+        1e24: "kvadrilyonuncu",
+        1e30: "kvintilyonuncu",
+        1e36: "sekstilyonuncu",
+        1e42: "septilyonuncu",
+        1e48: "oktilyonuncu",
+        1e54: "nonilyonuncu",
+        1e60: "dekilyonuncu"
+        # TODO > 1e60
+    }
+    _LONG_ORDINAL_AZ.update(_ORDINAL_BASE_AZ)
+    # negate next number (-2 = 0 - 2)
+    _NEGATIVES_AZ = {"mənfi", "minus"}
+    # sum the next number (iyirmi iki = 20 + 2)
+    _SUMS_AZ = {'on', '10', 'iyirmi', '20', 'otuz', '30', 'qırx', '40', 'əlli', '50',
+                'altmış', '60', 'yetmiş', '70', 'səksən', '80', 'doxsan', '90'}
+    _MULTIPLIES_LONG_SCALE_AZ = set(_LONG_SCALE_AZ.values()) | \
+                                set(_LONG_SCALE_AZ.values())
+    _MULTIPLIES_SHORT_SCALE_AZ = set(_SHORT_SCALE_AZ.values()) | \
+                                 set(_SHORT_SCALE_AZ.values())
+    # split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
+    _FRACTION_MARKER_AZ = {"və"}
+    # decimal marker ( 1 nöqtə 5 = 1 + 0.5)
+    _DECIMAL_MARKER_AZ = {"nöqtə"}
+    _STRING_NUM_AZ = {v: k for k, v in _NUM_STRING_AZ.items()}
+    _SPOKEN_EXTRA_NUM_AZ = {
+        "yarım": 0.5,
+        "üçdəbir": 1 / 3,
+        "dörddəbir": 1 / 4
+    }
+    _STRING_SHORT_ORDINAL_AZ = {v: k for k, v in _SHORT_ORDINAL_AZ.items()}
+    _STRING_LONG_ORDINAL_AZ = {v: k for k, v in _LONG_ORDINAL_AZ.items()}
+
+    def convert_words_to_numbers(self, text, short_scale=True, ordinals=False):
+        """
+        Convert words in a string into their equivalent numbers.
+        Args:
+            text str:
+            short_scale boolean: True if short scale numbers should be used.
+            ordinals boolean: True if ordinals (e.g. birinci, ikinci, üçüncü) should
+                              be parsed to their number values (1, 2, 3...)
+
+        Returns:
+            str
+            The original text, with numbers subbed in where appropriate.
+
+        """
+        tokens = [Token(word, index) for index, word in enumerate(word_tokenize(text))]
+        numbers_to_replace = self.extract_numbers_az(tokens, short_scale, ordinals)
+        results = []
+        for token in tokens:
+            if not numbers_to_replace or \
+                    token.index < numbers_to_replace[0].start_index:
+                results.append(token.word)
+            else:
+                if numbers_to_replace and \
+                        token.index == numbers_to_replace[0].start_index:
+                    results.append(str(numbers_to_replace[0].value))
+                if numbers_to_replace and \
+                        token.index == numbers_to_replace[0].end_index:
+                    numbers_to_replace.pop(0)
+
+        return ' '.join(results)
+
+    def extract_numbers(self, tokens: list, short_scale: bool=False, ordinals: bool=False) -> List:
+        """
+        extract numeric values from a list of tokens.
+        Args:
+            tokens (list): list of tokens (str)
+            short_scale boolean: True if short scale numbers should be used.
+            ordinals boolean: True if ordinals (e.g. first, second, third) should
+                              be parsed to their number values (1, 2, 3...)
+        Returns:
+            list of extraced numbers (ReplaceableNumber)
+
+        """
+        if not isinstance(tokens[0], Token): # list of string tokens
+            tokens = [Token(word, index) for index, word in enumerate(tokens)]
+        numbers_to_replace = self._extract_numbers_with_text_az(tokens, short_scale, ordinals)
+        numbers_to_replace.sort(key=lambda number: number.start_index)
+        return numbers_to_replace
+
+    def is_fractional(self, input_str, short_scale=True, spoken=True):
+        """
+        This function takes the given text and checks if it is a fraction.
+
+        Args:
+            input_str (str): the string to check if fractional
+            short_scale (bool): use short scale if True, long scale if False
+            spoken (bool):
+        Returns:
+            (bool) or (float): False if not a fraction, otherwise the fraction
+
+        """
+
+        fracts = {"dörddəbir": 4, "yarım": 2, "üçdəbir": 3}
+        for num in self._FRACTION_STRING_AZ:
+            if num > 2:
+                fracts[self._FRACTION_STRING_AZ[num]] = num
+
+        if input_str.lower() in fracts and spoken:
+            return 1.0 / fracts[input_str.lower()]
+        return False
+
+    # helper methods
+
+    def _extract_numbers_with_text_az(self, tokens, short_scale=True,
+                                      ordinals=False, fractional_numbers=True):
+        """
+        Extract all numbers from a list of Tokens, with the words that
+        represent them.
+
+        Args:
+            [Token]: The tokens to parse.
+            short_scale bool: True if short scale numbers should be used, False for
+                              long scale. True by default.
+            ordinals bool: True if ordinal words (birinci, ikinci, üçüncü, etc) should
+                           be parsed.
+            fractional_numbers bool: True if we should look for fractions and
+                                     decimals.
+
+        Returns:
+            [ReplaceableNumber]: A list of tuples, each containing a number and a
+                             string.
+
+        """
+        placeholder = "<placeholder>"  # inserted to maintain correct indices
+        results = []
+        while True:
+            to_replace = \
+                self._extract_number_with_text_az(tokens, short_scale,
+                                                  ordinals, fractional_numbers)
+            if not to_replace:
+                break
+
+            results.append(to_replace)
+
+            tokens = [
+                t if not
+                to_replace.start_index <= t.index <= to_replace.end_index
+                else
+                Token(placeholder, t.index) for t in tokens
+            ]
+        results.sort(key=lambda n: n.start_index)
+        return results
+
+    def _extract_number_with_text_az(self, tokens, short_scale=True,
+                                     ordinals=False, fractional_numbers=True):
+        """
+        This function extracts a number from a list of Tokens.
+
+        Args:
+            tokens str: the string to normalize
+            short_scale (bool): use short scale if True, long scale if False
+            ordinals (bool): consider ordinal numbers
+            fractional_numbers (bool): True if we should look for fractions and
+                                       decimals.
+        Returns:
+            ReplaceableNumber
+
+        """
+        number, tokens = \
+            self._extract_number_with_text_az_helper(tokens, short_scale,
+                                                     ordinals, fractional_numbers)
+        return ReplaceableNumber(number, tokens)
+
+    def _extract_number_with_text_az_helper(self, tokens,
+                                            short_scale=True, ordinals=False,
+                                            fractional_numbers=True):
+        """
+        Helper for _extract_number_with_text_az.
+
+        This contains the real logic for parsing, but produces
+        a result that needs a little cleaning (specific, it may
+        contain leading articles that can be trimmed off).
+
+        Args:
+            tokens [Token]:
+            short_scale boolean:
+            ordinals boolean:
+            fractional_numbers boolean:
+
+        Returns:
+            int or float, [Tokens]
+
+        """
+        if fractional_numbers:
+            fraction, fraction_text = \
+                self._extract_fraction_with_text_az(tokens, short_scale, ordinals)
+            if fraction:
+                # print("fraction")
+                return fraction, fraction_text
+
+            decimal, decimal_text = \
+                self._extract_decimal_with_text_az(tokens, short_scale, ordinals)
+            if decimal:
+                # print("decimal")
+                return decimal, decimal_text
+
+        return self._extract_whole_number_with_text_az(tokens, short_scale, ordinals)
+
+    def _extract_fraction_with_text_az(self, tokens, short_scale, ordinals):
+        """
+        Extract fraction numbers from a string.
+
+        This function handles text such as '2 və dörddə üç'. Note that "yarım" or
+        similar will be parsed by the whole number function.
+
+        Args:
+            tokens [Token]: words and their indexes in the original string.
+            short_scale boolean:
+            ordinals boolean:
+
+        Returns:
+            (int or float, [Token])
+            The value found, and the list of relevant tokens.
+            (None, None) if no fraction value is found.
+
+        """
+        for c in self._FRACTION_MARKER_AZ:
+            partitions = partition_list(tokens, lambda t: t.word == c)
+
+            if len(partitions) == 3:
+                numbers1 = \
+                    self._extract_numbers_with_text_az(partitions[0], short_scale,
+                                                       ordinals, fractional_numbers=False)
+                numbers2 = \
+                    self._extract_numbers_with_text_az(partitions[2], short_scale,
+                                                       ordinals, fractional_numbers=True)
+
+                if not numbers1 or not numbers2:
+                    return None, None
+
+                # ensure first is not a fraction and second is a fraction
+                num1 = numbers1[-1]
+                num2 = numbers2[0]
+                if num1.value >= 1 and 0 < num2.value < 1:
+                    return num1.value + num2.value, \
+                           num1.tokens + partitions[1] + num2.tokens
+
+        return None, None
+
+    def _extract_decimal_with_text_az(self, tokens, short_scale, ordinals):
+        """
+        Extract decimal numbers from a string.
+
+        This function handles text such as '2 nöqtə 5'.
+
+        Notes:
+            While this is a helper for extractnumber_az, it also depends on
+            extractnumber_az, to parse out the components of the decimal.
+
+            This does not currently handle things like:
+                number dot number number number
+
+        Args:
+            tokens [Token]: The text to parse.
+            short_scale boolean:
+            ordinals boolean:
+
+        Returns:
+            (float, [Token])
+            The value found and relevant tokens.
+            (None, None) if no decimal value is found.
+
+        """
+        for c in self._DECIMAL_MARKER_AZ:
+            partitions = partition_list(tokens, lambda t: t.word == c)
+
+            if len(partitions) == 3:
+                numbers1 = \
+                    self._extract_numbers_with_text_az(partitions[0], short_scale,
+                                                       ordinals, fractional_numbers=False)
+                numbers2 = \
+                    self._extract_numbers_with_text_az(partitions[2], short_scale,
+                                                       ordinals, fractional_numbers=False)
+                if not numbers1 or not numbers2:
+                    return None, None
+
+                number = numbers1[-1]
+                decimal = numbers2[0]
+
+                # TODO handle number dot number number number
+                if "." not in str(decimal.text):
+                    return number.value + float('0.' + str(decimal.value)), \
+                           number.tokens + partitions[1] + decimal.tokens
+        return None, None
+
+    def _extract_whole_number_with_text_az(self, tokens, short_scale, ordinals):
+        """
+        Handle numbers not handled by the decimal or fraction functions. This is
+        generally whole numbers. Note that phrases such as "yarım" will be
+        handled by this function.
+
+        Args:
+            tokens [Token]:
+            short_scale boolean:
+            ordinals boolean:
+
+        Returns:
+            int or float, [Tokens]
+            The value parsed, and tokens that it corresponds to.
+
+        """
+        multiplies, string_num_ordinal, string_num_scale = \
+            self._initialize_number_data_az(short_scale, speech=ordinals is not None)
+
+        number_words = []  # type: List[Token]
+        val = False
+        prev_val = None
+        next_val = None
+        to_sum = []
+        # print(tokens, ordinals)
+        for idx, token in enumerate(tokens):
+            current_val = None
+            if next_val:
+                next_val = None
+                continue
+
+            word = token.word.lower()
+            if word in self._NEGATIVES_AZ:
+                number_words.append(token)
+                continue
+
+            prev_word = tokens[idx - 1].word.lower() if idx > 0 else ""
+            next_word = tokens[idx + 1].word.lower() if idx + 1 < len(tokens) else ""
+            # print(prev_word, word, next_word, number_words)
+            if word not in string_num_scale and \
+                    word not in self._STRING_NUM_AZ and \
+                    word not in self._SUMS_AZ and \
+                    word not in multiplies and \
+                    not (ordinals and word in string_num_ordinal) and \
+                    not is_numeric(word) and \
+                    not self.is_fractional(word, short_scale=short_scale) and \
+                    not look_for_fractions(word.split('/')):
+                # print("a1")
+                words_only = [token.word for token in number_words]
+
+                if number_words and not all([w.lower() in
+                                             self._NEGATIVES_AZ for w in words_only]):
+                    break
+                else:
+                    number_words = []
+                    continue
+            elif word not in multiplies \
+                    and word not in self._SPOKEN_EXTRA_NUM_AZ \
+                    and prev_word not in multiplies \
+                    and prev_word not in self._SUMS_AZ \
+                    and not (ordinals and prev_word in string_num_ordinal) \
+                    and prev_word not in self._NEGATIVES_AZ:
+                number_words = [token]
+                # print("a2")
+            elif prev_word in self._SUMS_AZ and word in self._SUMS_AZ:
+                number_words = [token]
+                # print("a3")
+            elif ordinals is None and \
+                    (word in string_num_ordinal or word in self._SPOKEN_EXTRA_NUM_AZ):
+                # print("a4")
+                # flagged to ignore this token
+                continue
+            else:
+                # print("a5")
+                number_words.append(token)
+
+            # is this word already a number ?
+            if is_numeric(word):
+                # print("b")
+                if word.isdigit():  # doesn't work with decimals
+                    val = int(word)
+                else:
+                    val = float(word)
+                current_val = val
+
+            # is this word the name of a number ?
+            if word in self._STRING_NUM_AZ:
+                val = self._STRING_NUM_AZ.get(word)
+                current_val = val
+                # print("c1", current_val)
+            elif word in string_num_scale:
+                val = string_num_scale.get(word)
+                current_val = val
+                # print("c2")
+            elif ordinals and word in string_num_ordinal:
+                val = string_num_ordinal[word]
+                current_val = val
+                # print("c3")
+            # is the prev word a number and should we sum it?
+            # twenty two, fifty six
+            if (prev_word in self._SUMS_AZ and val and val < 10) or all([prev_word in
+                                                                         multiplies,
+                                                                         val < prev_val if prev_val else False]):
+                val = prev_val + val
+                # print("d")
+
+            # is the prev word a number and should we multiply it?
+            # twenty hundred, six hundred
+            if word in multiplies:
+                if not prev_val:
+                    prev_val = 1
+                val = prev_val * val
+                # print("e")
+
+            # is this a spoken fraction?
+            # 1 yarım fincan - yarım fincan
+            if current_val is None and not (ordinals is None and word in self._SPOKEN_EXTRA_NUM_AZ):
+                val = self.is_fractional(word, short_scale=short_scale,
+                                         spoken=ordinals is not None)
+                if val:
+                    if prev_val:
+                        val += prev_val
+                    current_val = val
+                    # print("f", current_val, prev_val)
+                    if word in self._SPOKEN_EXTRA_NUM_AZ:
+                        break
+
+            # dörddə bir
+            if ordinals is False:
+                temp = prev_val
+                prev_val = self.is_fractional(prev_word, short_scale=short_scale)
+                if prev_val:
+                    if not val:
+                        val = 1
+                    val = val * prev_val
+                    if idx + 1 < len(tokens):
+                        number_words.append(tokens[idx + 1])
+                else:
+                    prev_val = temp
+                # print("g", prev_val)
+
+            # is this a negative number?
+            if val and prev_word and prev_word in self._NEGATIVES_AZ:
+                val = 0 - val
+                # print("h")
+
+            # let's make sure it isn't a fraction
+            if not val:
+                # look for fractions like "2/3"
+                aPieces = word.split('/')
+                if look_for_fractions(aPieces):
+                    val = float(aPieces[0]) / float(aPieces[1])
+                    current_val = val
+                # print("i")
+
+            else:
+                if current_val and all([
+                    prev_word in self._SUMS_AZ,
+                    word not in self._SUMS_AZ,
+                    word not in multiplies,
+                    current_val >= 10]):
+                    # Backtrack - we've got numbers we can't sum.
+                    # print("j", number_words, prev_val)
+                    number_words.pop()
+                    val = prev_val
+                    break
+                prev_val = val
+
+                if word in multiplies and next_word not in multiplies:
+                    # handle long numbers
+                    # six hundred sixty six
+                    # two million five hundred thousand
+                    #
+                    # This logic is somewhat complex, and warrants
+                    # extensive documentation for the next coder's sake.
+                    #
+                    # The current word is a power of ten. `current_val` is
+                    # its integer value. `val` is our working sum
+                    # (above, when `current_val` is 1 million, `val` is
+                    # 2 million.)
+                    #
+                    # We have a dict `string_num_scale` containing [value, word]
+                    # pairs for "all" powers of ten: string_num_scale[10] == "ten.
+                    #
+                    # We need go over the rest of the tokens, looking for other
+                    # powers of ten. If we find one, we compare it with the current
+                    # value, to see if it's smaller than the current power of ten.
+                    #
+                    # Numbers which are not powers of ten will be passed over.
+                    #
+                    # If all the remaining powers of ten are smaller than our
+                    # current value, we can set the current value aside for later,
+                    # and begin extracting another portion of our final result.
+                    # For example, suppose we have the following string.
+                    # The current word is "million".`val` is 9000000.
+                    # `current_val` is 1000000.
+                    #
+                    #    "nine **million** nine *hundred* seven **thousand**
+                    #     six *hundred* fifty seven"
+                    #
+                    # Iterating over the rest of the string, the current
+                    # value is larger than all remaining powers of ten.
+                    #
+                    # The if statement passes, and nine million (9000000)
+                    # is appended to `to_sum`.
+                    #
+                    # The main variables are reset, and the main loop begins
+                    # assembling another number, which will also be appended
+                    # under the same conditions.
+                    #
+                    # By the end of the main loop, to_sum will be a list of each
+                    # "place" from 100 up: [9000000, 907000, 600]
+                    #
+                    # The final three digits will be added to the sum of that list
+                    # at the end of the main loop, to produce the extracted number:
+                    #
+                    #    sum([9000000, 907000, 600]) + 57
+                    # == 9,000,000 + 907,000 + 600 + 57
+                    # == 9,907,657
+                    #
+                    # >>> foo = "nine million nine hundred seven thousand six
+                    #            hundred fifty seven"
+                    # >>> extract_number(foo)
+                    # 9907657
+                    # print("k", tokens[idx+1:])
+                    time_to_sum = True
+                    for other_token in tokens[idx + 1:]:
+                        if other_token.word.lower() in multiplies:
+                            if string_num_scale[other_token.word.lower()] >= current_val:
+                                time_to_sum = False
+                            else:
+                                continue
+                        if not time_to_sum:
+                            break
+                    if time_to_sum:
+                        # print("l")
+                        to_sum.append(val)
+                        val = 0
+                        prev_val = 0
+
+        if val is not None and to_sum:
+            # print("m", to_sum)
+            val += sum(to_sum)
+        # print(val, number_words, "end")
+        return val, number_words
+
+    def _initialize_number_data_az(self, short_scale, speech=True):
+        """
+        Generate dictionaries of words to numbers, based on scale.
+
+        This is a helper function for _extract_whole_number.
+
+        Args:
+            short_scale (bool):
+            speech (bool): consider extra words (_SPOKEN_EXTRA_NUM_AZ) to be numbers
+
+        Returns:
+            (set(str), dict(str, number), dict(str, number))
+            multiplies, string_num_ordinal, string_num_scale
+
+        """
+        multiplies = self._MULTIPLIES_SHORT_SCALE_AZ if short_scale \
+            else self._MULTIPLIES_LONG_SCALE_AZ
+
+        string_num_ordinal_az = self._STRING_SHORT_ORDINAL_AZ if short_scale \
+            else self._STRING_LONG_ORDINAL_AZ
+
+        string_num_scale_az = self._SHORT_SCALE_AZ if short_scale else self._LONG_SCALE_AZ
+        string_num_scale_az = {v: k for k, v in string_num_scale_az.items()}
+
+        return multiplies, string_num_ordinal_az, string_num_scale_az
diff --git a/ovos_utterance_normalizer/res/az/normalize.json b/ovos_utterance_normalizer/res/az/normalize.json
new file mode 100644
index 0000000..1a7729a
--- /dev/null
+++ b/ovos_utterance_normalizer/res/az/normalize.json
@@ -0,0 +1,45 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {
+    "sora": "sonra"
+  },
+  "number_replacements": {
+    "sıfır": "0",
+    "bir": "1",
+    "iki": "2",
+    "üç": "3",
+    "dörd": "4",
+    "beş": "5",
+    "altı": "6",
+    "yeddi": "7",
+    "səkkiz": "8",
+    "doqquz": "9",
+    "on": "10",
+    "on bir": "11",
+    "on iki": "12",
+    "on üç": "13",
+    "on dörd": "14",
+    "on beş": "15",
+    "on altı": "16",
+    "on yeddi": "17",
+    "on səkkiz": "18",
+    "on doqquz": "19",
+    "iyirmi": "20",
+    "otuz": "30",
+    "qırx": "40",
+    "əlli": "50",
+    "altmiş": "60",
+    "yetmiş": "70",
+    "səksən": "80",
+    "doxsan": "90"
+  },
+  "stopwords": [],
+  "articles": []
+}
\ No newline at end of file
diff --git a/ovos_utterance_normalizer/res/ca/normalize.json b/ovos_utterance_normalizer/res/ca/normalize.json
new file mode 100644
index 0000000..76fbdc2
--- /dev/null
+++ b/ovos_utterance_normalizer/res/ca/normalize.json
@@ -0,0 +1,109 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": false,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": true,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {
+    "catorze": "14",
+    "cent": "100",
+    "cents": "100",
+    "cinc": "5",
+    "cinc-centes": "500",
+    "cinc-cents": "500",
+    "cinquanta": "50",
+    "deu": "10",
+    "dinou": "19",
+    "setze": "16",
+    "disset": "17",
+    "dihuit": "18",
+    "divuit": "18",
+    "dos": "2",
+    "dos-centes": "200",
+    "dos-cents": "200",
+    "dotze": "12",
+    "dues": "2",
+    "dues-centes": "200",
+    "huitanta": "80",
+    "huit": "8",
+    "huit-centes": "800",
+    "huit-cents": "800",
+    "mil": "1000",
+    "milió": "1000000",
+    "nou": "9",
+    "nou-centes": "900",
+    "nou-cents": "900",
+    "noranta": "90",
+    "onze": "11",
+    "primer": "1",
+    "primera": "1",
+    "quaranta": "40",
+    "quatre": "4",
+    "quatre-centes": "400",
+    "quatre-cents": "400",
+    "quinze": "15",
+    "segon": "2",
+    "segona": "2",
+    "seixanta": "60",
+    "set": "7",
+    "set-centes": "700",
+    "set-cents": "700",
+    "setanta": "70",
+    "sis": "6",
+    "sis-centes": "600",
+    "sis-cents": "600",
+    "tercer": "3",
+    "trenta": "30",
+    "tres": "3",
+    "tres-centes": "300",
+    "tres-cents": "300",
+    "tretze": "13",
+    "u": "1",
+    "un": "1",
+    "una": "1",
+    "vint": "20",
+    "vuitanta": "80",
+    "vuit": "8",
+    "vuit-centes": "800",
+    "vuit-cents": "800",
+    "zero": "0"
+  },
+  "stopwords": [
+    "de",
+    "del",
+    "dels",
+    "ell",
+    "ella",
+    "ells",
+    "elles",
+    "jo",
+    "i",
+    "al",
+    "dins la",
+    "a la",
+    "nosaltres",
+    "dins el",
+    "para",
+    "aquest",
+    "aquesta",
+    "aquests",
+    "aquestes",
+    "aquell",
+    "aquella",
+    "aquells",
+    "aquelles",
+    "que"
+  ],
+  "articles": [
+    "el",
+    "la",
+    "l",
+    "els",
+    "les",
+    "los"
+  ]
+}
diff --git a/ovos_utterance_normalizer/res/cz/normalize.json b/ovos_utterance_normalizer/res/cz/normalize.json
new file mode 100644
index 0000000..c7836ee
--- /dev/null
+++ b/ovos_utterance_normalizer/res/cz/normalize.json
@@ -0,0 +1,46 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {
+    "nula": "0",
+    "jedna": "1",
+    "dva": "2",
+    "dvě": "2",
+    "tři": "3",
+    "čtyři": "4",
+    "pět": "5",
+    "šest": "6",
+    "sedm": "7",
+    "sedum": "7",
+    "osm": "8",
+    "osum": "8",
+    "devět": "9",
+    "deset": "10",
+    "jedenáct": "11",
+    "dvanáct": "12",
+    "třináct": "13",
+    "čtrnáct": "14",
+    "patnáct": "15",
+    "šestnáct": "16",
+    "sedmnáct": "17",
+    "osmnáct": "18",
+    "devatenáct": "19",
+    "dvacet": "20",
+    "třicet": "30",
+    "čtyřicet": "40",
+    "padesát": "50",
+    "šedesát": "60",
+    "sedmdesát": "70",
+    "osmdesát": "80",
+    "devadesát": "90"
+  },
+  "stopwords": [],
+  "articles": []
+}
\ No newline at end of file
diff --git a/ovos_utterance_normalizer/res/de/normalize.json b/ovos_utterance_normalizer/res/de/normalize.json
new file mode 100644
index 0000000..63e5204
--- /dev/null
+++ b/ovos_utterance_normalizer/res/de/normalize.json
@@ -0,0 +1,122 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {
+    "am": "an dem",
+    "ans": "an das",
+    "aufs": "auf das",
+    "beim": "bei dem",
+    "durchs": "durch das",
+    "fürs": "für das",
+    "hinterm": "hinter dem",
+    "ins": "in das",
+    "übers": "über das",
+    "unters": "unter das",
+    "unterm": "unter dem",
+    "vom": "von dem",
+    "vors": "vor das",
+    "vorm": "vor dem",
+    "zum": "zu dem"
+  },
+  "word_replacements": {
+    "mg": "milligramm",
+    "kg": "kilogramm",
+    "g": "gramm",
+    "nm": "nanometer",
+    "µm": "mikrometer",
+    "mm": "millimeter",
+    "mm^2": "quadratmillimeter",
+    "mm²": "quadratmillimeter",
+    "cm": "zentimeter",
+    "cm^2": "quadratzentimeter",
+    "cm²": "quadratzentimeter",
+    "cm^3": "kubikzentimeter",
+    "cm³": "kubikzentimeter",
+    "dm": "dezimeter",
+    "m": "meter",
+    "m^2": "quadratmeter",
+    "m²": "quadratmeter",
+    "m^3": "kubikmeter",
+    "m³": "kubikmeter",
+    "km": "kilometer",
+    "km^2": "quadratkilometer",
+    "km²": "quadratkilometer",
+    "ha": "hektar",
+    "w": "watt",
+    "j": "joule",
+    "kj": "kilojoule",
+    "k_b": "kilobyte",
+    "m_b": "megabyte",
+    "g_b": "gigabyte",
+    "t_b": "terabyte",
+    "p_b": "petabyte",
+    "k_w": "kilowatt",
+    "kb": "kilobyte",
+    "mb": "megabyte",
+    "gb": "gigabyte",
+    "tb": "terabyte",
+    "pb": "petabyte",
+    "kw": "kilowatt",
+    "m_w": "megawatt",
+    "g_w": "gigawatt",
+    "mw": "megawatt",
+    "gw": "gigawatt",
+    "°": "grad",
+    "°c": "grad celsius",
+    "°f": "grad fahrenheit"
+  },
+  "number_replacements": {
+    "null": "0",
+    "eins": "1",
+    "zwei": "2",
+    "drei": "3",
+    "vier": "4",
+    "fünf": "5",
+    "sechs": "6",
+    "sieben": "7",
+    "acht": "8",
+    "neun": "9",
+    "zehn": "10",
+    "elf": "11",
+    "zwölf": "12",
+    "dreizehn": "13",
+    "vierzehn": "14",
+    "fünfzehn": "15",
+    "sechzehn": "16",
+    "siebzehn": "17",
+    "achtzehn": "18",
+    "neunzehn": "19",
+    "zwanzig": "20",
+    "einundzwanzig": "21",
+    "zweiundzwanzig": "22",
+    "dreiundzwanzig": "23",
+    "vierundzwanzig": "24",
+    "fünfundzwanzig": "25",
+    "sechsundzwanzig": "26",
+    "siebenundzwanzig": "27",
+    "achtundzwanzig": "28",
+    "neunundzwanzig": "29",
+    "dreißig": "30",
+    "einunddreißig": "31",
+    "vierzig": "40",
+    "fünfzig": "50",
+    "sechtzig": "60",
+    "siebzig": "70",
+    "achtzig": "80",
+    "neunzig": "90"
+  },
+  "stopwords": [],
+  "articles": [
+    "der",
+    "die",
+    "das",
+    "dem",
+    "den",
+    "des"
+  ]
+}
diff --git a/ovos_utterance_normalizer/res/en/normalize.json b/ovos_utterance_normalizer/res/en/normalize.json
new file mode 100644
index 0000000..9ae7bf3
--- /dev/null
+++ b/ovos_utterance_normalizer/res/en/normalize.json
@@ -0,0 +1,215 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {
+    "I'd": "I would",
+    "I'll": "I will",
+    "I'm": "I am",
+    "I've": "I have",
+    "ain't": "is not",
+    "aren't": "are not",
+    "can't": "can not",
+    "could've": "could have",
+    "couldn't": "could not",
+    "didn't": "did not",
+    "doesn't": "does not",
+    "don't": "do not",
+    "gonna": "going to",
+    "gotta": "got to",
+    "hadn't": "had not",
+    "hasn't": "has not",
+    "haven't": "have not",
+    "he'd": "he would",
+    "he'll": "he will",
+    "he's": "he is",
+    "how'd": "how did",
+    "how'll": "how will",
+    "how's": "how is",
+    "isn't": "is not",
+    "it'd": "it would",
+    "it'll": "it will",
+    "it's": "it is",
+    "might've": "might have",
+    "mightn't": "might not",
+    "must've": "must have",
+    "mustn't": "must not",
+    "needn't": "need not",
+    "oughtn't": "ought not",
+    "shan't": "shall not",
+    "she'd": "she would",
+    "she'll": "she will",
+    "she's": "she is",
+    "should've": "should have",
+    "shouldn't": "should not",
+    "somebody's": "somebody is",
+    "someone'd": "someone would",
+    "someone'll": "someone will",
+    "someone's": "someone is",
+    "that'd": "that would",
+    "that'll": "that will",
+    "that's": "that is",
+    "there'd": "there would",
+    "there're": "there are",
+    "there's": "there is",
+    "they'd": "they would",
+    "they'll": "they will",
+    "they're": "they are",
+    "they've": "they have",
+    "wasn't": "was not",
+    "we'd": "we would",
+    "we'll": "we will",
+    "we're": "we are",
+    "we've": "we have",
+    "weren't": "were not",
+    "what'd": "what did",
+    "what'll": "what will",
+    "what're": "what are",
+    "what's": "what is",
+    "what've": "what have",
+    "whats": "what is",
+    "when'd": "when did",
+    "when's": "when is",
+    "where'd": "where did",
+    "where's": "where is",
+    "where've": "where have",
+    "who'd": "who would",
+    "who'd've": "who would have",
+    "who'll": "who will",
+    "who're": "who are",
+    "who's": "who is",
+    "who've": "who have",
+    "why'd": "why did",
+    "why're": "why are",
+    "why's": "why is",
+    "won't": "will not",
+    "won't've": "will not have",
+    "would've": "would have",
+    "wouldn't": "would not",
+    "wouldn't've": "would not have",
+    "y'ain't": "you are not",
+    "y'aint": "you are not",
+    "y'all": "you all",
+    "ya'll": "you all",
+    "you'd": "you would",
+    "you'd've": "you would have",
+    "you'll": "you will",
+    "you're": "you are",
+    "you've": "you have",
+    "I'm'a": "I am going to",
+    "I'm'o": "I am going to",
+    "I'll've": "I will have",
+    "I'd've": "I would have",
+    "Whatcha": "What are you",
+    "amn't": "am not",
+    "'cause": "because",
+    "can't've": "cannot have",
+    "couldn't've": "could not have",
+    "daren't": "dare not",
+    "daresn't": "dare not",
+    "dasn't": "dare not",
+    "everyone's": "everyone is",
+    "gimme": "give me",
+    "gon't": "go not",
+    "hadn't've": "had not have",
+    "he've": "he would have",
+    "he'll've": "he will have",
+    "he'd've": "he would have",
+    "here's": "here is",
+    "how're": "how are",
+    "how'd'y": "how do you do",
+    "howd'y": "how do you do",
+    "howdy": "how do you do",
+    "'tis": "it is",
+    "'twas": "it was",
+    "it'll've": "it will have",
+    "it'd've": "it would have",
+    "kinda": "kind of",
+    "let's": "let us",
+    "ma'am": "madam",
+    "may've": "may have",
+    "mayn't": "may not",
+    "mightn't've": "might not have",
+    "mustn't've": "must not have",
+    "needn't've": "need not have",
+    "ol'": "old",
+    "oughtn't've": "ought not have",
+    "sha'n't": "shall not",
+    "shan't": "shall not",
+    "shalln't": "shall not",
+    "shan't've": "shall not have",
+    "she'd've": "she would have",
+    "shouldn't've": "should not have",
+    "so've": "so have",
+    "so's": "so is",
+    "something's": "something is",
+    "that're": "that are",
+    "that'd've": "that would have",
+    "there'll": "there will",
+    "there'd've": "there would have",
+    "these're": "these are",
+    "they'll've": "they will have",
+    "they'd've": "they would have",
+    "this's": "this is",
+    "this'll": "this will",
+    "this'd": "this would",
+    "those're": "those are",
+    "to've": "to have",
+    "wanna": "want to",
+    "we'll've": "we will have",
+    "we'd've": "we would have",
+    "what'll've": "what will have",
+    "when've": "when have",
+    "where're": "where are",
+    "which's": "which is",
+    "who'll've": "who will have",
+    "why've": "why have",
+    "will've": "will have",
+    "y'all're": "you all are",
+    "y'all've": "you all have",
+    "y'all'd": "you all would",
+    "y'all'd've": "you all would have",
+    "you'll've": "you will have"
+  },
+  "word_replacements": {},
+  "number_replacements": {
+    "zero": "0",
+    "one": "1",
+    "two": "2",
+    "three": "3",
+    "four": "4",
+    "five": "5",
+    "six": "6",
+    "seven": "7",
+    "eight": "8",
+    "nine": "9",
+    "ten": "10",
+    "eleven": "11",
+    "twelve": "12",
+    "thirteen": "13",
+    "fourteen": "14",
+    "fifteen": "15",
+    "sixteen": "16",
+    "seventeen": "17",
+    "eighteen": "18",
+    "nineteen": "19",
+    "twenty": "20",
+    "thirty": "30",
+    "forty": "40",
+    "fifty": "50",
+    "sixty": "60",
+    "seventy": "70",
+    "eighty": "80",
+    "ninety": "90"
+  },
+  "stopwords": [],
+  "articles": [
+    "the",
+    "a",
+    "an"
+  ]
+}
diff --git a/ovos_utterance_normalizer/res/es/normalize.json b/ovos_utterance_normalizer/res/es/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/es/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/fr/normalize.json b/ovos_utterance_normalizer/res/fr/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/fr/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/it/normalize.json b/ovos_utterance_normalizer/res/it/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/it/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/nl/normalize.json b/ovos_utterance_normalizer/res/nl/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/nl/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/no/normalize.json b/ovos_utterance_normalizer/res/no/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/no/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/pt/normalize.json b/ovos_utterance_normalizer/res/pt/normalize.json
new file mode 100644
index 0000000..52fd4b8
--- /dev/null
+++ b/ovos_utterance_normalizer/res/pt/normalize.json
@@ -0,0 +1,98 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": false,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": true,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {
+    "catorze": "14",
+    "cem": "100",
+    "cento": "100",
+    "cinco": "5",
+    "cinquenta": "50",
+    "dez": "10",
+    "dezanove": "19",
+    "dezasseis": "16",
+    "dezassete": "17",
+    "dezoito": "18",
+    "dois": "2",
+    "doze": "12",
+    "duas": "2",
+    "duzentas": "200",
+    "duzentos": "200",
+    "mil": "1000",
+    "milhão": "1000000",
+    "nove": "9",
+    "novecentas": "900",
+    "novecentos": "900",
+    "noventa": "90",
+    "oitenta": "80",
+    "oito": "8",
+    "oitocentas": "800",
+    "oitocentos": "800",
+    "onze": "11",
+    "primeiro": "1",
+    "quarenta": "40",
+    "quatro": "4",
+    "quatrocentas": "400",
+    "quatrocentos": "400",
+    "quinhentas": "500",
+    "quinhentos": "500",
+    "quinze": "15",
+    "segundo": "2",
+    "seis": "6",
+    "seiscentas": "600",
+    "seiscentos": "600",
+    "sessenta": "60",
+    "sete": "7",
+    "setecentas": "700",
+    "setecentos": "700",
+    "setenta": "70",
+    "terceiro": "3",
+    "tres": "3",
+    "treze": "13",
+    "trezentas": "300",
+    "trezentos": "300",
+    "trinta": "30",
+    "três": "3",
+    "um": "1",
+    "uma": "1",
+    "vinte": "20",
+    "zero": "0"
+  },
+  "stopwords": [
+    "de",
+    "dos",
+    "das",
+    "lhe",
+    "lhes",
+    "me",
+    "e",
+    "no",
+    "nas",
+    "na",
+    "nos",
+    "em",
+    "para",
+    "este",
+    "esta",
+    "deste",
+    "desta",
+    "neste",
+    "nesta",
+    "nesse",
+    "nessa",
+    "foi",
+    "que"
+  ],
+  "articles": [
+    "o",
+    "a",
+    "os",
+    "as"
+  ]
+}
\ No newline at end of file
diff --git a/ovos_utterance_normalizer/res/ru/normalize.json b/ovos_utterance_normalizer/res/ru/normalize.json
new file mode 100644
index 0000000..b7322d9
--- /dev/null
+++ b/ovos_utterance_normalizer/res/ru/normalize.json
@@ -0,0 +1,46 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {
+    "ноль": "0",
+    "нуль": "0",
+    "один": "1",
+    "одна": "1",
+    "два": "2",
+    "две": "2",
+    "три": "3",
+    "четыре": "4",
+    "пять": "5",
+    "шесть": "6",
+    "семь": "7",
+    "восемь": "8",
+    "девять": "9",
+    "десять": "10",
+    "одиннадцать": "11",
+    "двенадцать": "12",
+    "тринадцать": "13",
+    "четырнадцать": "14",
+    "пятнадцать": "15",
+    "шестнадцать": "16",
+    "семнадцать": "17",
+    "восемнадцать": "18",
+    "девятнадцать": "19",
+    "двадцать": "20",
+    "тридцать": "30",
+    "сорок": "40",
+    "пятьдесят": "50",
+    "шестьдесят": "60",
+    "семьдесят": "70",
+    "восемьдесят": "80",
+    "девяносто": "90"
+  },
+  "stopwords": [],
+  "articles": []
+}
\ No newline at end of file
diff --git a/ovos_utterance_normalizer/res/sl/normalize.json b/ovos_utterance_normalizer/res/sl/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/sl/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/sv/normalize.json b/ovos_utterance_normalizer/res/sv/normalize.json
new file mode 100644
index 0000000..1a7c447
--- /dev/null
+++ b/ovos_utterance_normalizer/res/sv/normalize.json
@@ -0,0 +1,14 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {},
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/res/uk/normalize.json b/ovos_utterance_normalizer/res/uk/normalize.json
new file mode 100644
index 0000000..aa13d2c
--- /dev/null
+++ b/ovos_utterance_normalizer/res/uk/normalize.json
@@ -0,0 +1,74 @@
+{
+  "lowercase": false,
+  "numbers_to_digits": true,
+  "expand_contractions": true,
+  "remove_symbols": true,
+  "remove_accents": false,
+  "remove_articles": false,
+  "remove_stopwords": false,
+  "contractions": {},
+  "word_replacements": {},
+  "number_replacements": {
+    "ноль": "0",
+    "нуль": "0",
+    "нуля": "0",
+    "один": "1",
+    "одна": "1",
+    "одну": "1",
+    "одного": "1",
+    "пару": "2",
+    "пари": "2",
+    "пара": "2",
+    "два": "2",
+    "двох": "2",
+    "дві": "2",
+    "три": "3",
+    "трьох": "3",
+    "чотири": "4",
+    "п'ять": "5",
+    "шість": "6",
+    "сім": "7",
+    "вісім": "8",
+    "дев'ять": "9",
+    "десять": "10",
+    "одинадцять": "11",
+    "дванадцять": "12",
+    "тринадцять": "13",
+    "чотирнадцять": "14",
+    "п'ятнадцять": "15",
+    "шістнадцять": "16",
+    "сімнадцять": "17",
+    "вісімнадцять": "18",
+    "дев'ятнадцять": "19",
+    "двадцять": "20",
+    "тридцять": "30",
+    "сорок": "40",
+    "п'ятдесят": "50",
+    "шістдесят": "60",
+    "сімдесят": "70",
+    "вісімдесят": "80",
+    "дев'яносто": "90",
+    "сто": "100",
+    "двісті": "200",
+    "триста": "300",
+    "чотириста": "400",
+    "п'ятсот": "500",
+    "шістсот": "600",
+    "сімсот": "700",
+    "вісімсот": "800",
+    "дев'ятсот": "900",
+    "дві сотні": "200",
+    "три сотні": "300",
+    "чотири сотні": "400",
+    "п'ять сотень": "500",
+    "шість сотень": "600",
+    "сім сотень": "700",
+    "вісім сотень": "800",
+    "дев'ять сотень": "900",
+    "тисячі": "1000",
+    "тисяча": "1000",
+    "тисяч": "1000"
+  },
+  "stopwords": [],
+  "articles": []
+}
diff --git a/ovos_utterance_normalizer/tokenization.py b/ovos_utterance_normalizer/tokenization.py
new file mode 100644
index 0000000..7a57b10
--- /dev/null
+++ b/ovos_utterance_normalizer/tokenization.py
@@ -0,0 +1,195 @@
+import re
+from collections import namedtuple
+from datetime import datetime, date, timedelta, time
+from typing import List, Any
+from ovos_utils import flatten_list
+from quebra_frases import word_tokenize as _wtok, sentence_tokenize as _stok
+
+# Token is intended to be used in the number processing functions in
+# this module. The parsing requires slicing and dividing of the original
+# text. To ensure things parse correctly, we need to know where text came
+# from in the original input, hence this nametuple.
+Token = namedtuple('Token', 'word index')
+
+
+class ReplaceableEntity:
+    """
+    Similar to Token, this class is used in entity parsing.
+
+    Once we've found an entity in a string, this class contains all
+    the info about the value, and where it came from in the original text.
+    In other words, it is the text, and the entity that can replace it in
+    the string.
+    """
+
+    def __init__(self, value: Any, tokens: List):
+        self.value = value
+        self.tokens = tokens
+
+    @property
+    def type(self):
+        return type(self.value)
+
+    def __bool__(self):
+        return bool(self.value is not None and self.value is not False)
+
+    @property
+    def start_index(self):
+        return self.tokens[0].index
+
+    @property
+    def end_index(self):
+        return self.tokens[-1].index
+
+    @property
+    def text(self):
+        return ' '.join([t.word for t in self.tokens])
+
+    def __setattr__(self, key, value):
+        try:
+            getattr(self, key)
+        except AttributeError:
+            super().__setattr__(key, value)
+        else:
+            raise Exception("Immutable!")
+
+    def __str__(self):
+        return f"({self.value}, {self.tokens})"
+
+    def __repr__(self):
+        return "{n}({v}, {t})".format(n=self.__class__.__name__, v=self.value,
+                                      t=[t.word for t in self.tokens])
+
+
+class ReplaceableNumber(ReplaceableEntity):
+    """
+    Similar to Token, this class is used in number parsing.
+
+    Once we've found a number in a string, this class contains all
+    the info about the value, and where it came from in the original text.
+    In other words, it is the text, and the number that can replace it in
+    the string.
+    """
+
+
+class ReplaceableDate(ReplaceableEntity):
+    """
+    Similar to Token, this class is used in date parsing.
+
+    Once we've found a date in a string, this class contains all
+    the info about the value, and where it came from in the original text.
+    In other words, it is the text, and the date that can replace it in
+    the string.
+    """
+
+    def __init__(self, value: date, tokens: List):
+        if isinstance(value, datetime):
+            value = value.date()
+        assert isinstance(value, date)
+        super().__init__(value, tokens)
+
+
+class ReplaceableTime(ReplaceableEntity):
+    """
+    Similar to Token, this class is used in date parsing.
+
+    Once we've found a time in a string, this class contains all
+    the info about the value, and where it came from in the original text.
+    In other words, it is the text, and the time that can replace it in
+    the string.
+    """
+
+    def __init__(self, value: time, tokens: List):
+        if isinstance(value, datetime):
+            value = value.time()
+        assert isinstance(value, time)
+        super().__init__(value, tokens)
+
+
+class ReplaceableTimedelta(ReplaceableEntity):
+    """
+    Similar to Token, this class is used in date parsing.
+
+    Once we've found a timedelta in a string, this class contains all
+    the info about the value, and where it came from in the original text.
+    In other words, it is the text, and the duration that can replace it in
+    the string.
+    """
+
+    def __init__(self, value: timedelta, tokens: List):
+        assert isinstance(value, timedelta)
+        super().__init__(value, tokens)
+
+
+def partition_list(items, split_on):
+    """
+    Partition a list of items.
+
+    Works similarly to str.partition
+
+    Args:
+        items:
+        split_on callable:
+            Should return a boolean. Each item will be passed to
+            this callable in succession, and partitions will be
+            created any time it returns True.
+
+    Returns:
+        [[any]]
+
+    """
+    splits = []
+    current_split = []
+    for item in items:
+        if split_on(item):
+            splits.append(current_split)
+            splits.append([item])
+            current_split = []
+        else:
+            current_split.append(item)
+    splits.append(current_split)
+    return list(filter(lambda x: len(x) != 0, splits))
+
+
+def sentence_tokenize(text):
+    sents = [_stok(s) for s in text.split("\n")]
+    return flatten_list(sents)
+
+
+def word_tokenize(utterance, lang=None):
+    if lang is not None and lang.startswith("pt"):
+        return word_tokenize_pt(utterance)
+    elif lang is not None and lang.startswith("ca"):
+        return word_tokenize_ca(utterance)
+    # Split things like 12%
+    utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance)
+    # Split thins like #1
+    utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance)
+    return _wtok(utterance)
+
+
+def word_tokenize_pt(utterance):
+    # Split things like 12%
+    utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance)
+    # Split things like #1
+    utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance)
+    # Split things like amo-te
+    utterance = re.sub(r"([a-zA-Z]+)(-)([a-zA-Z]+\b)", r"\1 \2 \3",
+                       utterance)
+    tokens = utterance.split()
+    if tokens[-1] == '-':
+        tokens = tokens[:-1]
+
+    return tokens
+
+
+def word_tokenize_ca(utterance):
+    # Split things like 12%
+    utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance)
+    # Split things like #1
+    utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance)
+    # Don't split at -
+    tokens = utterance.split()
+    if tokens[-1] == '-':
+        tokens = tokens[:-1]
+    return tokens
\ No newline at end of file
diff --git a/ovos_utterance_normalizer/version.py b/ovos_utterance_normalizer/version.py
new file mode 100644
index 0000000..79d91ab
--- /dev/null
+++ b/ovos_utterance_normalizer/version.py
@@ -0,0 +1,7 @@
+# The following lines are replaced during the release process.
+# START_VERSION_BLOCK
+VERSION_MAJOR = 0
+VERSION_MINOR = 0
+VERSION_BUILD = 0
+VERSION_ALPHA = 1
+# END_VERSION_BLOCK
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1275d2e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+ovos-utils
+quebra-frases
+ovos-plugin-manager
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..fcbeca2
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+import os
+
+from setuptools import setup
+
+BASEDIR = os.path.abspath(os.path.dirname(__file__))
+
+
+def get_version():
+    """ Find the version of the package"""
+    version = None
+    version_file = os.path.join(BASEDIR, 'ovos_utterance_normalizer', 'version.py')
+    major, minor, build, alpha = (None, None, None, None)
+    with open(version_file) as f:
+        for line in f:
+            if 'VERSION_MAJOR' in line:
+                major = line.split('=')[1].strip()
+            elif 'VERSION_MINOR' in line:
+                minor = line.split('=')[1].strip()
+            elif 'VERSION_BUILD' in line:
+                build = line.split('=')[1].strip()
+            elif 'VERSION_ALPHA' in line:
+                alpha = line.split('=')[1].strip()
+
+            if ((major and minor and build and alpha) or
+                    '# END_VERSION_BLOCK' in line):
+                break
+    version = f"{major}.{minor}.{build}"
+    if alpha and int(alpha) > 0:
+        version += f"a{alpha}"
+    return version
+
+
+def package_files(directory):
+    paths = []
+    for (path, _, filenames) in os.walk(directory):
+        for filename in filenames:
+            paths.append(os.path.join('..', path, filename))
+    return paths
+
+
+def required(requirements_file):
+    """ Read requirements file and remove comments and empty lines. """
+    with open(os.path.join(BASEDIR, requirements_file), 'r') as f:
+        requirements = f.read().splitlines()
+        if 'MYCROFT_LOOSE_REQUIREMENTS' in os.environ:
+            print('USING LOOSE REQUIREMENTS!')
+            requirements = [r.replace('==', '>=').replace('~=', '>=') for r in requirements]
+        return [pkg for pkg in requirements
+                if pkg.strip() and not pkg.startswith("#")]
+
+
+extra_files = package_files('ovos-utterance-normalizer/res')
+
+
+UTTERANCE_ENTRY_POINT = (
+    'ovos-utterance-normalizer=ovos_utterance_normalizer:UtteranceNormalizerPlugin'
+)
+
+
+setup(
+    name='ovos-utterance-normalizer',
+    version=get_version(),
+    author='jarbasai',
+    author_email='jarbasai@mailfence.com',
+    url='https://github.com/OpenVoiceOS/ovos-utterance-normalizer',
+    license='apache-2.0',
+    packages=['ovos_utterance_normalizer'],
+    include_package_data=True,
+    package_data={"": extra_files},
+    install_requires=required("requirements.txt"),
+    zip_safe=True,
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'Topic :: Text Processing :: Linguistic',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+    ],
+    entry_points={
+        'neon.plugin.text': UTTERANCE_ENTRY_POINT
+    }
+)