Skip to content

Commit

Permalink
Fixes after review
Browse files Browse the repository at this point in the history
  • Loading branch information
ClemDoum committed May 20, 2019
1 parent c3383a6 commit 45f4fd4
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 48 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
### Changed
- Re-score ambiguous `DeterministicIntentParser` results based on slots [#791](https://github.com/snipsco/snips-nlu/pull/791)
- Accept ambiguous results from `DeterministicIntentParser` when confidence score is above 0.5 [#797](https://github.com/snipsco/snips-nlu/pull/797)
- Moved the NLU random state from the config to the shared resources [#801](https://github.com/snipsco/snips-nlu/pull/801)
- Bumped `scikit-learn` to `>=0.21,<0.22` for `python>=3.5` and `>=0.20<0.21` for `python<3.5` [#801](https://github.com/snipsco/snips-nlu/pull/801)

### Fixed
- Fixed a couple of bugs in the data augmentation which were making the NLU training non-deterministic [#801](https://github.com/snipsco/snips-nlu/pull/801)

## [0.19.6]
### Fixed
Expand Down
10 changes: 5 additions & 5 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ the dataset we generated earlier:
engine.fit(dataset)
Note that by default, the training of the engine is non-deterministic: if you
train your NLU twice on the same data and test it on the same input, you'll get
different outputs.
Note that, by default, training of the NLU engine is non-deterministic:
training and testing multiple times on the same data may produce different
outputs.

If you want to run training in a reproducible way you can pass a random seed to
your engine:
Reproducible trainings can be achieved by passing a **random seed** to the
engine:

.. code-block:: python
Expand Down
2 changes: 0 additions & 2 deletions snips_nlu/intent_classifier/log_reg_classifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,8 @@ def get_regularization_factor(dataset):

def get_noise_it(noise, mean_length, std_length, random_state):
it = itertools.cycle(noise)
i = 0
while True:
noise_length = int(random_state.normal(mean_length, std_length))
i += 1
# pylint: disable=stop-iteration-return
yield " ".join(next(it) for _ in range(noise_length))
# pylint: enable=stop-iteration-return
Expand Down
12 changes: 4 additions & 8 deletions snips_nlu/tests/test_crf_slot_filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@ def test_should_get_slots(self):
- make me [number_of_cups:snips/number](five) cups of tea
- please I want [number_of_cups](two) cups of tea""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = CRFSlotFillerConfig()
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
slot_filler = CRFSlotFiller(config, **shared)
slot_filler = CRFSlotFiller(**shared)
intent = "MakeTea"
slot_filler.fit(dataset, intent)

Expand Down Expand Up @@ -66,11 +65,10 @@ def test_should_get_builtin_slots(self):
- Can you tell me the weather [datetime] please ?
- what is the weather forecast [datetime] in [location](paris)""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = CRFSlotFillerConfig()
intent = "GetWeather"
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
slot_filler = CRFSlotFiller(config, **shared)
slot_filler = CRFSlotFiller(**shared)
slot_filler.fit(dataset, intent)

# When
Expand Down Expand Up @@ -104,11 +102,10 @@ def test_should_get_sub_builtin_slots(self):
- find an activity from [start](6pm) to [end](8pm)
- Book me a trip from [start](this friday) to [end](next tuesday)""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = CRFSlotFillerConfig()
intent = "PlanBreak"
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
slot_filler = CRFSlotFiller(config, **shared)
slot_filler = CRFSlotFiller(**shared)
slot_filler.fit(dataset, intent)

# When
Expand Down Expand Up @@ -360,11 +357,10 @@ def test_should_get_slots_after_deserialization(self):
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cups of tea ?""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = CRFSlotFillerConfig()
intent = "MakeTea"
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
slot_filler = CRFSlotFiller(config, **shared)
slot_filler = CRFSlotFiller(**shared)
slot_filler.fit(dataset, intent)
slot_filler.persist(self.tmp_file_path)

Expand Down
12 changes: 3 additions & 9 deletions snips_nlu/tests/test_log_reg_intent_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def test_should_get_intent(self):
- does it rain
- will it rain tomorrow""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = LogRegIntentClassifierConfig()
classifier = LogRegIntentClassifier(
config, random_state=42).fit(dataset)
classifier = LogRegIntentClassifier(random_state=42).fit(dataset)
text = "hey how are you doing ?"

# When
Expand Down Expand Up @@ -109,9 +107,7 @@ def test_should_get_intent_when_filter(self):
- brew two cups of coffee
- can you prepare one cup of coffee""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = LogRegIntentClassifierConfig()
classifier = LogRegIntentClassifier(
config, random_state=42).fit(dataset)
classifier = LogRegIntentClassifier(random_state=42).fit(dataset)

# When
text1 = "Make me two cups of tea"
Expand Down Expand Up @@ -171,9 +167,7 @@ def test_should_get_intents(self):
utterances:
- yili yulu yele""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
config = LogRegIntentClassifierConfig()
classifier = LogRegIntentClassifier(
config, random_state=42).fit(dataset)
classifier = LogRegIntentClassifier(random_state=42).fit(dataset)
text = "yala yili yulu"

# When
Expand Down
29 changes: 5 additions & 24 deletions snips_nlu/tests/test_probabilistic_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,9 @@ def test_should_parse(self):
utterances:
- foz for [slot3:entity3](baz)""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
classifier_config = LogRegIntentClassifierConfig()
slot_filler_config = CRFSlotFillerConfig()
parser_config = ProbabilisticIntentParserConfig(
classifier_config, slot_filler_config)
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
parser = ProbabilisticIntentParser(parser_config, **shared)
parser = ProbabilisticIntentParser(**shared)
parser.fit(dataset)
text = "foo bar baz"

Expand Down Expand Up @@ -84,13 +80,9 @@ def test_should_parse_with_filter(self):
utterances:
- foz for [slot3:entity3](baz)""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
classifier_config = LogRegIntentClassifierConfig()
slot_filler_config = CRFSlotFillerConfig()
parser_config = ProbabilisticIntentParserConfig(
classifier_config, slot_filler_config)
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
parser = ProbabilisticIntentParser(parser_config, **shared)
parser = ProbabilisticIntentParser(**shared)
parser.fit(dataset)
text = "foo bar baz"

Expand Down Expand Up @@ -126,13 +118,9 @@ def test_should_parse_top_intents(self):
utterances:
- foz for [entity3](baz)""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
classifier_config = LogRegIntentClassifierConfig()
slot_filler_config = CRFSlotFillerConfig()
parser_config = ProbabilisticIntentParserConfig(
classifier_config, slot_filler_config)
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
parser = ProbabilisticIntentParser(parser_config, **shared)
parser = ProbabilisticIntentParser(**shared)
parser.fit(dataset)
text = "foo bar baz"

Expand Down Expand Up @@ -169,12 +157,9 @@ def test_should_get_intents(self):
utterances:
- yili yulu yele""")
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
classifier_config = LogRegIntentClassifierConfig()
parser_config = ProbabilisticIntentParserConfig(classifier_config)
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = 42
parser = ProbabilisticIntentParser(
parser_config, **shared).fit(dataset)
parser = ProbabilisticIntentParser(**shared).fit(dataset)
text = "yala yili yulu"

# When
Expand Down Expand Up @@ -689,13 +674,9 @@ def test_fitting_should_be_reproducible_after_serialization(self):
dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

seed = 666
config = ProbabilisticIntentParserConfig(
intent_classifier_config=LogRegIntentClassifierConfig(),
slot_filler_config=CRFSlotFillerConfig()
)
shared = self.get_shared_data(dataset)
shared[RANDOM_STATE] = seed
parser = ProbabilisticIntentParser(config, **shared)
parser = ProbabilisticIntentParser(**shared)
parser.persist(self.tmp_file_path)

# When
Expand Down

0 comments on commit 45f4fd4

Please sign in to comment.