From 93ba4e72fa633b658775a8891bed42837139d260 Mon Sep 17 00:00:00 2001 From: shademe Date: Wed, 14 Dec 2022 16:56:37 +0100 Subject: [PATCH 1/6] Add `ConsoleLogger.v3` This addition expands the progress bar feature to count up the training/distillation steps to either the next evaluation pass or the maximum number of steps. --- spacy/errors.py | 1 + spacy/training/loggers.py | 48 ++++++++++++++++++++++++++++++++--- website/docs/api/top-level.md | 32 +++++++++++++++++++---- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 0e5ef91ed77..cd9281e9121 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -962,6 +962,7 @@ class Errors(metaclass=ErrorsWithCodes): E1046 = ("{cls_name} is an abstract class and cannot be instantiated. If you are looking for spaCy's default " "knowledge base, use `InMemoryLookupKB`.") E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.") + E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py index 408ea71405f..1a282f5e378 100644 --- a/spacy/training/loggers.py +++ b/spacy/training/loggers.py @@ -26,6 +26,8 @@ def setup_table( return final_cols, final_widths, ["r" for _ in final_widths] +# We cannot rename this method as it's directly imported +# and used by external packages such as spacy-loggers. @registry.loggers("spacy.ConsoleLogger.v2") def console_logger( progress_bar: bool = False, @@ -33,7 +35,27 @@ def console_logger( output_file: Optional[Union[str, Path]] = None, ): """The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file. - progress_bar (bool): Whether the logger should print the progress bar. + progress_bar (bool): Whether the logger should print a progress bar tracking the steps till the next evaluation pass. + console_output (bool): Whether the logger should print the logs on the console. + output_file (Optional[Union[str, Path]]): The file to save the training logs to. + """ + return console_logger_v3( + progress_bar=None if progress_bar is False else "eval_steps", + console_output=console_output, + output_file=output_file, + ) + + +@registry.loggers("spacy.ConsoleLogger.v3") +def console_logger_v3( + progress_bar: Optional[str] = None, + console_output: bool = True, + output_file: Optional[Union[str, Path]] = None, +): + """The ConsoleLogger.v3 prints out training logs in the console and/or saves them to a jsonl file. + progress_bar (Optional[str]): Type of progress bar to show in the console. Allowed values: + all_steps - Tracks the number of steps until `training.max_steps` is reached. + eval_steps - Tracks the number of steps until `training.eval_frequency` is reached. console_output (bool): Whether the logger should print the logs on the console. output_file (Optional[Union[str, Path]]): The file to save the training logs to. """ @@ -70,6 +92,7 @@ def setup_printer( for name, proc in nlp.pipeline if hasattr(proc, "is_trainable") and proc.is_trainable ] + max_steps = nlp.config["training"]["max_steps"] eval_frequency = nlp.config["training"]["eval_frequency"] score_weights = nlp.config["training"]["score_weights"] score_cols = [col for col, value in score_weights.items() if value is not None] @@ -84,6 +107,13 @@ def setup_printer( write(msg.row(table_header, widths=table_widths, spacing=spacing)) write(msg.row(["-" * width for width in table_widths], spacing=spacing)) progress = None + expected_progress_types = ("all_steps", "eval_steps", None) + if progress_bar not in expected_progress_types: + raise ValueError( + Errors.E1048.format( + unexpected=progress_bar, expected=expected_progress_types + ) + ) def log_step(info: Optional[Dict[str, Any]]) -> None: nonlocal progress @@ -142,10 +172,22 @@ def log_step(info: Optional[Dict[str, Any]]) -> None: ) if progress_bar: # Set disable=None, so that it disables on non-TTY + if progress_bar == "all_steps": + total = max_steps + desc = f"Last Eval Epoch: {info['epoch']}" + initial = info["step"] + else: + total = eval_frequency + desc = f"Epoch {info['epoch']+1}" + initial = 0 progress = tqdm.tqdm( - total=eval_frequency, disable=None, leave=False, file=stderr + total=total, + disable=None, + leave=False, + file=stderr, + initial=initial, ) - progress.set_description(f"Epoch {info['epoch']+1}") + progress.set_description(desc) def finalize() -> None: if output_stream: diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index 26a5d42f44a..a8651873588 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -564,11 +564,33 @@ start decreasing across epochs. -| Name | Description | -| ---------------- | --------------------------------------------------------------------- | -| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ | -| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ | -| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | +| Name | Description | +| ---------------- | -------------------------------------------------------------------------------------------------------- | +| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass.~~bool~~ | +| `console_output` | Whether the logger should print the logs in the console. ~~bool~~ | +| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | + +#### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"} + +> #### Example config +> +> ```ini +> [training.logger] +> @loggers = "spacy.ConsoleLogger.v3" +> progress_bar = "all_steps" +> console_output = true +> output_file = "training_log.jsonl" +> ``` + +Writes the results of a training step to the console in a tabular format and +saves them to a `jsonl` file. + +| Name | Description | +| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| `progress_bar` | Type of progress bar to show in the console: `all_steps` or `eval_steps` | +| | They track the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively.~~Optional[str]~~ | +| `console_output` | Whether the logger should print the logs in the console.~~bool~~ | +| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | ## Readers {#readers} From 41ec44d99ae5a1abfb3980f27b896af999a82718 Mon Sep 17 00:00:00 2001 From: shademe Date: Fri, 16 Dec 2022 11:58:52 +0100 Subject: [PATCH 2/6] Rename progress bar types --- spacy/training/loggers.py | 10 +++++----- website/docs/api/top-level.md | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py index 1a282f5e378..ee2db2bc2f8 100644 --- a/spacy/training/loggers.py +++ b/spacy/training/loggers.py @@ -40,7 +40,7 @@ def console_logger( output_file (Optional[Union[str, Path]]): The file to save the training logs to. """ return console_logger_v3( - progress_bar=None if progress_bar is False else "eval_steps", + progress_bar=None if progress_bar is False else "eval", console_output=console_output, output_file=output_file, ) @@ -54,8 +54,8 @@ def console_logger_v3( ): """The ConsoleLogger.v3 prints out training logs in the console and/or saves them to a jsonl file. progress_bar (Optional[str]): Type of progress bar to show in the console. Allowed values: - all_steps - Tracks the number of steps until `training.max_steps` is reached. - eval_steps - Tracks the number of steps until `training.eval_frequency` is reached. + train - Tracks the number of steps from the beginning of training until the full training run is complete (training.max_steps is reached). + eval - Tracks the number of steps between the previous and next evaluation (training.eval_frequency is reached). console_output (bool): Whether the logger should print the logs on the console. output_file (Optional[Union[str, Path]]): The file to save the training logs to. """ @@ -107,7 +107,7 @@ def setup_printer( write(msg.row(table_header, widths=table_widths, spacing=spacing)) write(msg.row(["-" * width for width in table_widths], spacing=spacing)) progress = None - expected_progress_types = ("all_steps", "eval_steps", None) + expected_progress_types = ("train", "eval", None) if progress_bar not in expected_progress_types: raise ValueError( Errors.E1048.format( @@ -172,7 +172,7 @@ def log_step(info: Optional[Dict[str, Any]]) -> None: ) if progress_bar: # Set disable=None, so that it disables on non-TTY - if progress_bar == "all_steps": + if progress_bar == "train": total = max_steps desc = f"Last Eval Epoch: {info['epoch']}" initial = info["step"] diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index a8651873588..fc30b086af5 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -587,7 +587,7 @@ saves them to a `jsonl` file. | Name | Description | | ---------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `progress_bar` | Type of progress bar to show in the console: `all_steps` or `eval_steps` | +| `progress_bar` | Type of progress bar to show in the console: `train` or `eval` | | | They track the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively.~~Optional[str]~~ | | `console_output` | Whether the logger should print the logs in the console.~~bool~~ | | `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | From 7e793ded5a654f3152b914fbb6a05c5de0f16fe6 Mon Sep 17 00:00:00 2001 From: shademe Date: Thu, 22 Dec 2022 17:16:31 +0100 Subject: [PATCH 3/6] Add defaults to docs Minor fixes --- website/docs/api/top-level.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index fc30b086af5..e401a4d9b5b 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -513,7 +513,7 @@ a [Weights & Biases](https://www.wandb.com/) dashboard. Instead of using one of the built-in loggers, you can [implement your own](/usage/training#custom-logging). -#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"} +#### spacy.ConsoleLogger.v2 {tag="registered function"} > #### Example config > @@ -564,11 +564,11 @@ start decreasing across epochs. -| Name | Description | -| ---------------- | -------------------------------------------------------------------------------------------------------- | -| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass.~~bool~~ | -| `console_output` | Whether the logger should print the logs in the console. ~~bool~~ | -| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | +| Name | Description | +| ---------------- | --------------------------------------------------------------------------------------------------------------------------- | +| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass (default: `False`).~~bool~~ | +| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ | +| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ | #### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"} @@ -583,14 +583,14 @@ start decreasing across epochs. > ``` Writes the results of a training step to the console in a tabular format and -saves them to a `jsonl` file. - -| Name | Description | -| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `progress_bar` | Type of progress bar to show in the console: `train` or `eval` | -| | They track the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively.~~Optional[str]~~ | -| `console_output` | Whether the logger should print the logs in the console.~~bool~~ | -| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | +optionally saves them to a `jsonl` file. + +| Name | Description | +| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `progress_bar` | Type of progress bar to show in the console: ``"train"`, `"eval"` or `None`. | +| | The bar tracks the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively (default: `None`).~~Optional[str]~~ | +| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ | +| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ | ## Readers {#readers} From 2dc82234f4bdea448a49bfe682abd6509b895ead Mon Sep 17 00:00:00 2001 From: shademe Date: Thu, 22 Dec 2022 17:16:38 +0100 Subject: [PATCH 4/6] Move comment --- spacy/training/loggers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py index ee2db2bc2f8..90214d0b525 100644 --- a/spacy/training/loggers.py +++ b/spacy/training/loggers.py @@ -171,7 +171,6 @@ def log_step(info: Optional[Dict[str, Any]]) -> None: ) ) if progress_bar: - # Set disable=None, so that it disables on non-TTY if progress_bar == "train": total = max_steps desc = f"Last Eval Epoch: {info['epoch']}" @@ -180,6 +179,7 @@ def log_step(info: Optional[Dict[str, Any]]) -> None: total = eval_frequency desc = f"Epoch {info['epoch']+1}" initial = 0 + # Set disable=None, so that it disables on non-TTY progress = tqdm.tqdm( total=total, disable=None, From 68464cf04f13cc5b6070bb0e21b6b174650ad47e Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Fri, 23 Dec 2022 18:37:36 +0900 Subject: [PATCH 5/6] Minor punctuation fixes --- website/docs/api/top-level.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index e401a4d9b5b..883c5e3b948 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -564,11 +564,11 @@ start decreasing across epochs. -| Name | Description | -| ---------------- | --------------------------------------------------------------------------------------------------------------------------- | -| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass (default: `False`).~~bool~~ | -| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ | -| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ | +| Name | Description | +| ---------------- | ---------------------------------------------------------------------------------------------------------------------------- | +| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass (default: `False`). ~~bool~~ | +| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ | +| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ | #### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"} @@ -585,12 +585,12 @@ start decreasing across epochs. Writes the results of a training step to the console in a tabular format and optionally saves them to a `jsonl` file. -| Name | Description | -| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `progress_bar` | Type of progress bar to show in the console: ``"train"`, `"eval"` or `None`. | -| | The bar tracks the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively (default: `None`).~~Optional[str]~~ | -| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ | -| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ | +| Name | Description | +| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `progress_bar` | Type of progress bar to show in the console: `"train"`, `"eval"` or `None`. | +| | The bar tracks the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively (default: `None`). ~~Optional[str]~~ | +| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ | +| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ | ## Readers {#readers} From 8912a92449f9a9352710815e1a1ab3851e5f7d19 Mon Sep 17 00:00:00 2001 From: shademe Date: Fri, 23 Dec 2022 14:22:03 +0100 Subject: [PATCH 6/6] Explicitly check for `None` when validating progress bar type --- spacy/training/loggers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py index 90214d0b525..7de31822eb7 100644 --- a/spacy/training/loggers.py +++ b/spacy/training/loggers.py @@ -107,8 +107,8 @@ def setup_printer( write(msg.row(table_header, widths=table_widths, spacing=spacing)) write(msg.row(["-" * width for width in table_widths], spacing=spacing)) progress = None - expected_progress_types = ("train", "eval", None) - if progress_bar not in expected_progress_types: + expected_progress_types = ("train", "eval") + if progress_bar is not None and progress_bar not in expected_progress_types: raise ValueError( Errors.E1048.format( unexpected=progress_bar, expected=expected_progress_types