Skip to content

Commit

Permalink
ingore attribute-defined-outside-init in multi_gpu_policy (#2876)
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris Elion authored Nov 8, 2019
1 parent 720679a commit 20cdb21
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ repos:
.*_pb2_grpc.py|
.*/tests/.*
)$
require_serial: true
args: [--score=n]

# "Local" hooks, see https://pre-commit.com/#repository-local-hooks
- repo: local
Expand Down
25 changes: 22 additions & 3 deletions ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import logging
from typing import Any, Dict, List, Optional

import tensorflow as tf
from tensorflow.python.client import device_lib
from mlagents.envs.brain import BrainParameters
from mlagents.envs.timers import timed
from mlagents.trainers.models import EncoderType, LearningRateSchedule
from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.components.reward_signals.reward_signal_factory import (
create_reward_signal,
)
Expand All @@ -17,6 +20,23 @@


class MultiGpuPPOPolicy(PPOPolicy):
def __init__(
self,
seed: int,
brain: BrainParameters,
trainer_params: Dict[str, Any],
is_training: bool,
load: bool,
):
self.towers: List[PPOModel] = []
self.devices: List[str] = []
self.model: Optional[PPOModel] = None
self.total_policy_loss: Optional[tf.Tensor] = None
self.reward_signal_towers: List[Dict[str, RewardSignal]] = []
self.reward_signals: Dict[str, RewardSignal] = {}

super().__init__(seed, brain, trainer_params, is_training, load)

def create_model(
self, brain, trainer_params, reward_signal_configs, is_training, load, seed
):
Expand All @@ -28,7 +48,7 @@ def create_model(
:param seed: Random seed.
"""
self.devices = get_devices()
self.towers = []

with self.graph.as_default():
with tf.variable_scope("", reuse=tf.AUTO_REUSE):
for device in self.devices:
Expand Down Expand Up @@ -105,7 +125,6 @@ def create_reward_signals(self, reward_signal_configs):
Create reward signals
:param reward_signal_configs: Reward signal config.
"""
self.reward_signal_towers = []
with self.graph.as_default():
with tf.variable_scope(TOWER_SCOPE_NAME, reuse=tf.AUTO_REUSE):
for device_id, device in enumerate(self.devices):
Expand Down Expand Up @@ -190,7 +209,7 @@ def average_gradients(self, tower_grads):
return average_grads


def get_devices():
def get_devices() -> List[str]:
"""
Get all available GPU devices
"""
Expand Down

0 comments on commit 20cdb21

Please sign in to comment.