Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Turning some logger.info into logger.debug and remove some logging overhead when not using debug #5211

Merged
merged 4 commits into from
Apr 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ sizes and will need to be retrained. (#5181)
depend on the previous behavior, you can explicitly set the Agent's `InferenceDevice` to `InferenceDevice.CPU`. (#5175)

#### ml-agents / ml-agents-envs / gym-unity (Python)
- Some console output have been moved from `info` to `debug` and will not be printed by default. If you want all messages to be printed, you can run `mlagents-learn` with the `--debug` option or add the line `debug: true` at the top of the yaml config file. (#5211)

### Bug Fixes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
Expand Down
6 changes: 3 additions & 3 deletions ml-agents-envs/mlagents_envs/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def _update_behavior_specs(self, output: UnityOutputProto) -> None:
agent = agent_infos.value[0]
new_spec = behavior_spec_from_proto(brain_param, agent)
self._env_specs[brain_param.brain_name] = new_spec
logger.info(f"Connected new brain:\n{brain_param.brain_name}")
logger.info(f"Connected new brain: {brain_param.brain_name}")

def _update_state(self, output: UnityRLOutputProto) -> None:
"""
Expand Down Expand Up @@ -426,9 +426,9 @@ def _close(self, timeout: Optional[int] = None) -> None:
# Wait a bit for the process to shutdown, but kill it if it takes too long
try:
self._process.wait(timeout=timeout)
logger.info(self._returncode_to_env_message(self._process.returncode))
logger.debug(self._returncode_to_env_message(self._process.returncode))
except subprocess.TimeoutExpired:
logger.info("Environment timed out shutting down. Killing...")
logger.warning("Environment timed out shutting down. Killing...")
self._process.kill()
# Set to None so we don't try to close multiple times.
self._process = None
Expand Down
27 changes: 22 additions & 5 deletions ml-agents-envs/mlagents_envs/logging_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
_loggers = set()
_log_level = NOTSET
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
LOG_FORMAT = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
DEBUG_LOG_FORMAT = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, I think this was added by Anupam for log parsing (so he could get the time). Just checking in with @hvpeteet to make sure we aren't using it anywhere in cloud.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't parse logs in any meaningful way at the moment so I think this should be fine. We essentially just record stdout and stderr and forward it on to the user. This may also get forwarded to stackdriver but each line would already have a timestamp provided upon upload.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to provide --debug through the YAML? Just worried if users are depending on the current logs and want to restore their old functionality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

YES !
You can do this by adding debug: true in the yaml. For instance :

debug: true
behaviors:
  3DBall:
    trainer_type: ppo
    hyperparameters:
      batch_size: 64
      buffer_size: 12000
      learning_rate: 0.0003
      beta: 0.001
      epsilon: 0.2
      lambd: 0.99
      num_epoch: 3
      learning_rate_schedule: linear
    network_settings:
      normalize: true
      hidden_units: 128
      num_layers: 2
      vis_encode_type: simple
    reward_signals:
      extrinsic:
        gamma: 0.99
        strength: 1.0
    keep_checkpoints: 5
    max_steps: 500000
    time_horizon: 1000
    summary_freq: 12000
    threaded: true

The use case is not documented, I will add a line in the changelog.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks

LOG_FORMAT = "[%(levelname)s] %(message)s"


def get_logger(name: str) -> logging.Logger:
Expand All @@ -21,15 +22,19 @@ def get_logger(name: str) -> logging.Logger:
specified by set_log_level()
"""
logger = logging.getLogger(name=name)
# If we've already set the log level, make sure new loggers use it
if _log_level != NOTSET:
logger.setLevel(_log_level)

if _log_level == DEBUG:
formatter = logging.Formatter(fmt=DEBUG_LOG_FORMAT, datefmt=DATE_FORMAT)
else:
formatter = logging.Formatter(fmt=LOG_FORMAT)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter(fmt=LOG_FORMAT, datefmt=DATE_FORMAT)
handler.setFormatter(formatter)
logger.addHandler(handler)

# If we've already set the log level, make sure new loggers use it
if _log_level != NOTSET:
logger.setLevel(_log_level)

# Keep track of this logger so that we can change the log level later
_loggers.add(logger)
return logger
Expand All @@ -44,3 +49,15 @@ def set_log_level(log_level: int) -> None:

for logger in _loggers:
logger.setLevel(log_level)

if log_level == DEBUG:
formatter = logging.Formatter(fmt=DEBUG_LOG_FORMAT, datefmt=DATE_FORMAT)
else:
formatter = logging.Formatter(LOG_FORMAT)
_set_formatter_for_all_loggers(formatter)


def _set_formatter_for_all_loggers(formatter: logging.Formatter) -> None:
for logger in _loggers:
for handler in logger.handlers[:]:
handler.setFormatter(formatter)
2 changes: 1 addition & 1 deletion ml-agents/mlagents/torch_utils/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def set_torch_config(torch_settings: TorchSettings) -> None:
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
torch.set_default_tensor_type(torch.FloatTensor)
logger.info(f"default Torch device: {_device}")
logger.debug(f"default Torch device: {_device}")


# Initialize to default settings
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def run_cli(options: RunOptions) -> None:

if options.env_settings.seed == -1:
run_seed = np.random.randint(0, 10000)
logger.info(f"run_seed set to {run_seed}")
logger.debug(f"run_seed set to {run_seed}")
run_training(run_seed, options)


Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/policy/checkpoint_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def remove_checkpoint(checkpoint: Dict[str, Any]) -> None:
file_path: str = checkpoint["file_path"]
if os.path.exists(file_path):
os.remove(file_path)
logger.info(f"Removed checkpoint model {file_path}.")
logger.debug(f"Removed checkpoint model {file_path}.")
else:
logger.info(f"Checkpoint at {file_path} could not be found.")
logger.debug(f"Checkpoint at {file_path} could not be found.")
return

@classmethod
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/sac/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ def load_replay_buffer(self) -> None:
Loads the last saved replay buffer from a file.
"""
filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
logger.info(f"Loading Experience Replay Buffer from {filename}")
logger.info(f"Loading Experience Replay Buffer from {filename}...")
with open(filename, "rb+") as file_object:
self.update_buffer.load_from_file(file_object)
logger.info(
logger.debug(
"Experience replay buffer has {} experiences.".format(
self.update_buffer.num_experiences
)
Expand Down
5 changes: 3 additions & 2 deletions ml-agents/mlagents/trainers/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,14 @@ def _delete_all_events_files(self, directory_name: str) -> None:
for file_name in os.listdir(directory_name):
if file_name.startswith("events.out"):
logger.warning(
f"{file_name} was left over from a previous run. Deleting."
f"Deleting TensorBoard data {file_name} that was left over from a"
"previous run."
)
full_fname = os.path.join(directory_name, file_name)
try:
os.remove(full_fname)
except OSError:
logger.warning(
logger.error(
"{} was left over from a previous run and "
"not deleted.".format(full_fname)
)
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/subprocess_env_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def _generate_all_results() -> AllStepResult:
UnityEnvironmentException,
UnityCommunicatorStoppedException,
) as ex:
logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.")
logger.debug(f"UnityEnvironment worker {worker_id}: environment stopping.")
step_queue.put(
EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
)
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/torch/model_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def export_policy_model(self, output_filepath: str) -> None:
:param output_filepath: file path to output the model (without file suffix)
"""
onnx_output_path = f"{output_filepath}.onnx"
logger.info(f"Converting to {onnx_output_path}")
logger.debug(f"Converting to {onnx_output_path}")

with exporting_to_onnx():
torch.onnx.export(
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _save_models(self):

for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()
self.logger.info("Saved Model")
self.logger.debug("Saved Model")

@staticmethod
def _create_output_path(output_path):
Expand Down