Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New console logger with expanded progress tracking #11972

Merged
merged 6 commits into from
Dec 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions spacy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ class Errors(metaclass=ErrorsWithCodes):
E1046 = ("{cls_name} is an abstract class and cannot be instantiated. If you are looking for spaCy's default "
"knowledge base, use `InMemoryLookupKB`.")
E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.")
E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}")


# Deprecated model shortcuts, only used in errors and warnings
Expand Down
48 changes: 45 additions & 3 deletions spacy/training/loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,36 @@ def setup_table(
return final_cols, final_widths, ["r" for _ in final_widths]


# We cannot rename this method as it's directly imported
# and used by external packages such as spacy-loggers.
svlandeg marked this conversation as resolved.
Show resolved Hide resolved
@registry.loggers("spacy.ConsoleLogger.v2")
def console_logger(
progress_bar: bool = False,
console_output: bool = True,
output_file: Optional[Union[str, Path]] = None,
):
"""The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
progress_bar (bool): Whether the logger should print the progress bar.
progress_bar (bool): Whether the logger should print a progress bar tracking the steps till the next evaluation pass.
console_output (bool): Whether the logger should print the logs on the console.
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
"""
return console_logger_v3(
progress_bar=None if progress_bar is False else "eval",
console_output=console_output,
output_file=output_file,
)


@registry.loggers("spacy.ConsoleLogger.v3")
def console_logger_v3(
progress_bar: Optional[str] = None,
console_output: bool = True,
output_file: Optional[Union[str, Path]] = None,
):
"""The ConsoleLogger.v3 prints out training logs in the console and/or saves them to a jsonl file.
progress_bar (Optional[str]): Type of progress bar to show in the console. Allowed values:
train - Tracks the number of steps from the beginning of training until the full training run is complete (training.max_steps is reached).
eval - Tracks the number of steps between the previous and next evaluation (training.eval_frequency is reached).
console_output (bool): Whether the logger should print the logs on the console.
output_file (Optional[Union[str, Path]]): The file to save the training logs to.
"""
Expand Down Expand Up @@ -70,6 +92,7 @@ def setup_printer(
for name, proc in nlp.pipeline
if hasattr(proc, "is_trainable") and proc.is_trainable
]
max_steps = nlp.config["training"]["max_steps"]
eval_frequency = nlp.config["training"]["eval_frequency"]
score_weights = nlp.config["training"]["score_weights"]
score_cols = [col for col, value in score_weights.items() if value is not None]
Expand All @@ -84,6 +107,13 @@ def setup_printer(
write(msg.row(table_header, widths=table_widths, spacing=spacing))
write(msg.row(["-" * width for width in table_widths], spacing=spacing))
progress = None
expected_progress_types = ("train", "eval")
if progress_bar is not None and progress_bar not in expected_progress_types:
raise ValueError(
Errors.E1048.format(
unexpected=progress_bar, expected=expected_progress_types
)
)

def log_step(info: Optional[Dict[str, Any]]) -> None:
nonlocal progress
Expand Down Expand Up @@ -141,11 +171,23 @@ def log_step(info: Optional[Dict[str, Any]]) -> None:
)
)
if progress_bar:
if progress_bar == "train":
total = max_steps
desc = f"Last Eval Epoch: {info['epoch']}"
initial = info["step"]
else:
total = eval_frequency
desc = f"Epoch {info['epoch']+1}"
initial = 0
# Set disable=None, so that it disables on non-TTY
progress = tqdm.tqdm(
total=eval_frequency, disable=None, leave=False, file=stderr
total=total,
disable=None,
leave=False,
file=stderr,
initial=initial,
)
progress.set_description(f"Epoch {info['epoch']+1}")
progress.set_description(desc)

def finalize() -> None:
if output_stream:
Expand Down
34 changes: 28 additions & 6 deletions website/docs/api/top-level.md
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
Instead of using one of the built-in loggers, you can
[implement your own](/usage/training#custom-logging).

#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
#### spacy.ConsoleLogger.v2 {tag="registered function"}

> #### Example config
>
Expand Down Expand Up @@ -564,11 +564,33 @@ start decreasing across epochs.

</Accordion>

| Name | Description |
| ---------------- | --------------------------------------------------------------------- |
| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
| Name | Description |
| ---------------- | ---------------------------------------------------------------------------------------------------------------------------- |
| `progress_bar` | Whether the logger should print a progress bar tracking the steps till the next evaluation pass (default: `False`). ~~bool~~ |
| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ |
| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ |

#### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"}
shadeMe marked this conversation as resolved.
Show resolved Hide resolved

> #### Example config
>
> ```ini
> [training.logger]
> @loggers = "spacy.ConsoleLogger.v3"
> progress_bar = "all_steps"
> console_output = true
> output_file = "training_log.jsonl"
> ```

Writes the results of a training step to the console in a tabular format and
optionally saves them to a `jsonl` file.

| Name | Description |
| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `progress_bar` | Type of progress bar to show in the console: `"train"`, `"eval"` or `None`. |
| | The bar tracks the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively (default: `None`). ~~Optional[str]~~ |
| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~ |
| `output_file` | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~ |

## Readers {#readers}

Expand Down