update notes

merrymercy · merrymercy · commit 63c06d4ba9d7 · 2017-12-07T18:25:55.000+08:00
diff --git a/build.sh b/build.sh
@@ -8,7 +8,7 @@ fi
 mkdir -p build
 cd build
 cmake ..
-#make -j $(nproc)
+
 if [[ "$OSTYPE" == "linux-gnu" ]]; then
     # Linux
     make -j `nproc`
diff --git a/examples/train_against.py b/examples/train_against.py
@@ -228,7 +228,7 @@ def play_a_round(env, map_size, handles, models, print_every, eps, step_batch_si
         tic = time.time()
         start = 1 if args.opponent != -1 else 0.1
         train_eps = magent.utility.piecewise_decay(k, [0, 100, 250], [start, 0.1, 0.05]) if not args.greedy else 0
-        opponent_eps = train_eps if k < 0 else 0.05  # can use curriculum learning in first 100 steps
+        opponent_eps = train_eps if k < 100 else 0.05  # can use curriculum learning in first 100 steps
 
         loss, num, reward, value = play_a_round(env, args.map_size, handles, models,
                                                 eps=[opponent_eps, train_eps], step_batch_size=step_batch_size,
diff --git a/python/magent/gridworld.py b/python/magent/gridworld.py
@@ -703,6 +703,24 @@ def register_agent_type(self, name, attr):
             name of the type (should be unique)
         attr: dict
             key value pair of the agent type
+            see notes below to know the available attributes
+
+        Notes
+        -----
+        height: int, height of agent body
+        width:  int, width of agent body
+        speed:  float, maximum speed, i.e. the radius of move circle of the agent
+        hp:     float, maximum health point of the agent
+        view_range: gw.CircleRange or gw.SectorRange
+
+        damage: float, attack damage
+        step_recover: float, step recover of health point (can be negative)
+        kill_supply: float, the hp gain when kill this type of agents
+
+        step_reward: float, reward get in every step
+        kill_reward: float, reward gain when kill this type of agent
+        dead_penalty: float, reward get when dead
+        attack_penalty: float, reward get when perform an attack (this is used to make agents do not attack blank grid)
         """
         if name in self.agent_type_dict:
             raise Exception("type name %s already exists" % name)