Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update changelog for samples #5105

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ and this project adheres to
### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
- Updated com.unity.barracuda to 1.3.2-preview. (#5084)
- Added 3D Ball to the `com.unity.ml-agents` samples. (#5077)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The `encoding_size` setting for RewardSignals has been deprecated. Please use `network_settings` instead. (#4982)
- Sensor names are now passed through to `ObservationSpec.name`. (#5036)
Expand All @@ -41,6 +42,7 @@ and this project adheres to
#### ml-agents / ml-agents-envs / gym-unity (Python)
- An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
- Made the error message when observations of different shapes are sent to the trainer clearer. (#5030)
- An issue that prevented curriculums from incrementing with self-play has been fixed. (#5098)

## [1.8.1-preview] - 2021-03-08
### Minor Changes
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/poca/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def add_policy(
self.model_saver.initialize_or_load()

# Needed to resume loads properly
self.step = policy.get_current_step()
self._step = policy.get_current_step()

def get_policy(self, name_behavior_id: str) -> Policy:
"""
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def add_policy(
self.model_saver.initialize_or_load()

# Needed to resume loads properly
self.step = policy.get_current_step()
self._step = policy.get_current_step()

def get_policy(self, name_behavior_id: str) -> Policy:
"""
Expand Down
18 changes: 9 additions & 9 deletions ml-agents/mlagents/trainers/sac/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(
self.hyperparameters: SACSettings = cast(
SACSettings, trainer_settings.hyperparameters
)
self.step = 0
self._step = 0

# Don't divide by zero
self.update_steps = 1
Expand Down Expand Up @@ -188,7 +188,7 @@ def _is_ready_update(self) -> bool:
"""
return (
self.update_buffer.num_experiences >= self.hyperparameters.batch_size
and self.step >= self.hyperparameters.buffer_init_steps
and self._step >= self.hyperparameters.buffer_init_steps
)

@timed
Expand Down Expand Up @@ -251,9 +251,9 @@ def _update_sac_policy(self) -> bool:

batch_update_stats: Dict[str, list] = defaultdict(list)
while (
self.step - self.hyperparameters.buffer_init_steps
self._step - self.hyperparameters.buffer_init_steps
) / self.update_steps > self.steps_per_update:
logger.debug(f"Updating SAC policy at step {self.step}")
logger.debug(f"Updating SAC policy at step {self._step}")
buffer = self.update_buffer
if self.update_buffer.num_experiences >= self.hyperparameters.batch_size:
sampled_minibatch = buffer.sample_mini_batch(
Expand Down Expand Up @@ -305,12 +305,12 @@ def _update_reward_signals(self) -> None:
)
batch_update_stats: Dict[str, list] = defaultdict(list)
while (
self.step - self.hyperparameters.buffer_init_steps
self._step - self.hyperparameters.buffer_init_steps
) / self.reward_signal_update_steps > self.reward_signal_steps_per_update:
# Get minibatches for reward signal update if needed
reward_signal_minibatches = {}
for name in self.optimizer.reward_signals.keys():
logger.debug(f"Updating {name} at step {self.step}")
logger.debug(f"Updating {name} at step {self._step}")
if name != "extrinsic":
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
Expand Down Expand Up @@ -355,11 +355,11 @@ def add_policy(
self.model_saver.initialize_or_load()

# Needed to resume loads properly
self.step = policy.get_current_step()
self._step = policy.get_current_step()
# Assume steps were updated at the correct ratio before
self.update_steps = int(max(1, self.step / self.steps_per_update))
self.update_steps = int(max(1, self._step / self.steps_per_update))
self.reward_signal_update_steps = int(
max(1, self.step / self.reward_signal_steps_per_update)
max(1, self._step / self.reward_signal_steps_per_update)
)

def get_policy(self, name_behavior_id: str) -> Policy:
Expand Down
8 changes: 4 additions & 4 deletions ml-agents/mlagents/trainers/trainer/rl_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,10 @@ def _checkpoint(self) -> ModelCheckpoint:
logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
checkpoint_path = self.model_saver.save_checkpoint(self.brain_name, self.step)
checkpoint_path = self.model_saver.save_checkpoint(self.brain_name, self._step)
export_ext = "onnx"
new_checkpoint = ModelCheckpoint(
int(self.step),
int(self._step),
f"{checkpoint_path}.{export_ext}",
self._policy_mean_reward(),
time.time(),
Expand Down Expand Up @@ -199,7 +199,7 @@ def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
Increment the step count of the trainer
:param n_steps: number of steps to increment the step count by
"""
self.step += n_steps
self._step += n_steps
self._next_summary_step = self._get_next_interval_step(self.summary_freq)
self._next_save_step = self._get_next_interval_step(
self.trainer_settings.checkpoint_interval
Expand All @@ -213,7 +213,7 @@ def _get_next_interval_step(self, interval: int) -> int:
Get the next step count that should result in an action.
:param interval: The interval between actions.
"""
return self.step + (interval - self.step % interval)
return self._step + (interval - self._step % interval)

def _write_summary(self, step: int) -> None:
"""
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy_queues: List[AgentManagerQueue[Policy]] = []
self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
self.step: int = 0
self._step: int = 0
self.artifact_path = artifact_path
self.summary_freq = self.trainer_settings.summary_freq
self.policies: Dict[str, Policy] = {}
Expand Down Expand Up @@ -78,7 +78,7 @@ def get_step(self) -> int:
Returns the number of steps the trainer has performed
:return: the step count of the trainer
"""
return self.step
return self._step

@property
def threaded(self) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def end_trainer_episodes(self) -> None:
def reset_env_if_ready(self, env: EnvManager) -> None:
# Get the sizes of the reward buffers.
reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()}
curr_step = {k: int(t.step) for (k, t) in self.trainers.items()}
curr_step = {k: int(t.get_step) for (k, t) in self.trainers.items()}
max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()}
# Attempt to increment the lessons of the brains who
# were ready.
Expand Down