google · trax-robot · Sep 18, 2020 · Aug 20, 2020 · Aug 20, 2020 · Sep 4, 2020
diff --git a/trax/data/inputs.py b/trax/data/inputs.py
@@ -179,7 +179,7 @@ def pad_to_max_dims(tensors, boundary=None, strict_pad_on_len=False):
   to (3, 10) both and the returned tensor will have shape (2, 3, 10).
 
   When boundary is specified, we try to pad all unknown dimensions to boundary
-  if possible, which can help reduce the number of different shapes occuring
+  if possible, which can help reduce the number of different shapes occurring
   in the tensors and speed up XLA compilation. So, for example, a pair of
   tensors of shapes (8, 10), (8, 9) with boundary=12 will be padded to (8, 12).
 
@@ -293,7 +293,7 @@ def _buckets_for_length(bucket_length, batch_size, max_eval_length, n_devices,
                         batch_size // 16, 1]
   if not training:
     # The last bucket batch size is always 1, but the one-but-last is
-    # sized to accomodate the final length = bucket_boundaries[-1], which
+    # sized to accommodate the final length = bucket_boundaries[-1], which
     # we changed for eval above -- so adjusting here too.
 
     # Resize if needed, since bucket_batch_sizes may not be the same size
@@ -382,7 +382,7 @@ def add_loss_weights(generator, id_to_mask=None):
   - If the stream consists of pairs `(inputs, targets)`, a loss mask is added
     that is creates as a tensor of ones of the same shape as targets.
   - If `id_to_mask` is not `None`, and the stream (after the previous point)
-    has triples `(inputs, targets, weights)`, the weights are multipled by a
+    has triples `(inputs, targets, weights)`, the weights are multiplied by a
     0/1 mask that is 0 iff targets is equal to `id_to_mask` (1 otherwise).
 
   Args:

diff --git a/trax/fastmath/ops.py b/trax/fastmath/ops.py
@@ -339,7 +339,7 @@ def use_backend(name):
 
 
 def backend_name():
-  """Returns the name of the backend curently in use ('tf' or 'jax')."""
+  """Returns the name of the backend currently in use ('tf' or 'jax')."""
   return backend()['name']
 
 

diff --git a/trax/layers/research/efficient_attention.py b/trax/layers/research/efficient_attention.py
@@ -289,7 +289,7 @@ def permute_vjp(val):
     permuted = permute_impl(fastmath.stop_gradient(val))
     def vjpfun(permuted_grad):
       # JAX autodiff would synthesize a scatter operation because it doesn't
-      # know that the indices are a permutatation. However on TPU, gathers are
+      # know that the indices are a permutation. However on TPU, gathers are
       # faster than scatters (at least in the regime the LSH attention uses).
       return (np.take(permuted_grad, inverse_permutation, axis=axis),)
     return permuted, vjpfun

diff --git a/trax/models/research/rezero.py b/trax/models/research/rezero.py
@@ -198,7 +198,7 @@ def ReZeroTransformerLM(vocab_size,
     ff_activation: the non-linearity in feed-forward layer
 
   Returns:
-    A ReZero trasnformer language model as a layer that maps from a tensor of
+    A ReZero transformer language model as a layer that maps from a tensor of
     tokens to activations over a vocab set.
   """
   positional_encoder = [

diff --git a/trax/optimizers/trainer.py b/trax/optimizers/trainer.py
@@ -41,7 +41,7 @@ class Trainer(object):
   have already been initialized.
 
   The output after running the `one_step` function is just the loss from the
-  loss layer and optimizer statisics but, as a side effect, it also updates
+  loss layer and optimizer statistics but, as a side effect, it also updates
   the weights of the loss layer and the slots of the optimizer.
   """
 

diff --git a/trax/rl/actor_critic.py b/trax/rl/actor_critic.py
@@ -520,7 +520,7 @@ def advantage_std(self):
 
 
 class A2C(AdvantageBasedActorCriticAgent):
-  """Trains policy and value models using the A2C algortithm."""
+  """Trains policy and value models using the A2C algorithm."""
 
   on_policy = True
 
@@ -564,7 +564,7 @@ def f(log_probs, advantages, old_log_probs, mask):
 class PPO(AdvantageBasedActorCriticAgent):
   """The Proximal Policy Optimization Algorithm aka PPO.
 
-  Trains policy and value models using the PPO algortithm.
+  Trains policy and value models using the PPO algorithm.
   """
 
   on_policy = True
@@ -596,7 +596,7 @@ def f(new_log_probs, advantages, old_log_probs, mask):
                          ', %s != %s' % (new_log_probs.shape, mask.shape))
 
       # The ratio between new_probs and old_probs expressed
-      # using log_probs and exponentaion
+      # using log_probs and exponentiation
       probs_ratio = jnp.exp(new_log_probs - old_log_probs)
       if advantages.shape != probs_ratio.shape:
         raise ValueError('New log-probs and old log probs shapes '

diff --git a/trax/rl/rl_layers.py b/trax/rl/rl_layers.py
@@ -71,7 +71,7 @@ def ProbsRatio(dist_inputs, actions, old_log_probs, log_prob_fun):
       f'new_log_probs.shape was {new_log_probs.shape} and'
       f'old_log_probs.shape was {old_log_probs.shape}')
   # The ratio between new_probs and old_probs expressed
-  # using log_probs and exponentaion
+  # using log_probs and exponentiation
   probs_ratio = jnp.exp(new_log_probs - old_log_probs)
   return probs_ratio
 

diff --git a/trax/rl/task.py b/trax/rl/task.py
@@ -697,7 +697,7 @@ def pad(tensor_list):
         ])
         # Where act, rew and ret will usually have the following shape:
         # [batch_size, trajectory_length-1], which we call [B, L-1].
-        # Observations are more complex and will usuall be [B, L] + S where S
+        # Observations are more complex and will usually be [B, L] + S where S
         # is the shape of the observation space (self.observation_space.shape).
         # We stop the recursion at level 1, so we pass lists of arrays into
         # pad().

diff --git a/trax/rl/task_test.py b/trax/rl/task_test.py
@@ -209,7 +209,7 @@ def test_trajectory_stream_margin(self):
           self.assertTrue(next_slice.timesteps[i].done)
           self.assertFalse(next_slice.timesteps[i].mask)
         got_done = True
-    # Assert that we got a done somewhere, otherwise the test is not trigerred.
+    # Assert that we got a done somewhere, otherwise the test is not triggered.
     # Not getting done has low probability (1/2^10) but is possible, flaky test.
     self.assertTrue(got_done)
 

diff --git a/trax/rl_trainer.py b/trax/rl_trainer.py
@@ -94,7 +94,7 @@ def train_rl(
     num_actions: None unless one wants to use the discretization wrapper. Then
       num_actions specifies the number of discrete actions.
     light_rl: whether to use the light RL setting (experimental).
-    light_rl_trainer: whichh light RL trainer to use (experimental).
+    light_rl_trainer: which light RL trainer to use (experimental).
   """
   tf_np.set_allow_float64(FLAGS.tf_allow_float64)
 
@@ -213,7 +213,7 @@ def main(argv):
   gin_configs = FLAGS.config or []
   gin.parse_config_files_and_bindings(FLAGS.config_file, gin_configs)
 
-  logging.info('Gin cofig:')
+  logging.info('Gin config:')
   logging.info(gin_configs)
 
   train_rl(

diff --git a/trax/supervised/trainer_lib.py b/trax/supervised/trainer_lib.py
@@ -325,8 +325,8 @@ def train_step(self, batch):
     if self._should_log_now():
       for name, value in stat.items():
         # TODO(afrozm): value is a scalar, but sometimes JAX is crashing here
-        # with a device put array error complaning that it should be an array.
-        # On  multiple devices, take the mean.
+        # with a device put array error complaining that it should be an array.
+        # On multiple devices, take the mean.
         scalar_value = np.mean(np.array(value))
         self._train_sw.scalar('training/' + name, scalar_value, step=self._step)
     self._step += 1
@@ -489,7 +489,7 @@ def _should_log_now(self):
             and (self._step == 1 or self._step % 10 == 0))
 
   def _for_n_devices(self, x):
-    """Replicates/broadcasts `x` for n devices if `self.n_devicess > 1`."""
+    """Replicates/broadcasts `x` for n devices if `self.n_devices > 1`."""
     return tl.for_n_devices(x, self.n_devices)  # pylint: disable=protected-access
 
   def close(self):

diff --git a/trax/supervised/training.py b/trax/supervised/training.py
@@ -432,7 +432,7 @@ def _log_training_progress(self, task, total_loss, n_steps, elapsed_time,
       task (TrainTask): The current task.
       total_loss: Total training loss accumulated over n_steps training steps.
       n_steps: Number of steps over which the metrics were accumulated.
-      elapsed_time: Time of execusion of n_steps training steps.
+      elapsed_time: Time of execution of n_steps training steps.
       optimizer_metrics: Dict from optimizer metric name to metric values.
       summary_writer: Jaxboard summary writer for saving provided metrics.
     """

diff --git a/trax/supervised/training_test.py b/trax/supervised/training_test.py
@@ -106,7 +106,7 @@ def test_train_dense_layer_evals(self):
     self.assertEqual(10, training_session.step)  # Unchanged
 
   def test_summaries_are_written(self):
-    """Training writes down metrics when writting is turned on."""
+    """Training writes down metrics when writing is turned on."""
     model = tl.Serial(tl.Dense(1))
     task = training.TrainTask(
         _very_simple_data(), tl.L2Loss(), optimizers.SGD(.01))

diff --git a/trax/tf_numpy/numpy_impl/utils.py b/trax/tf_numpy/numpy_impl/utils.py
@@ -303,7 +303,7 @@ def _maybe_static(x):
     return value
 
 
-# All the following functions exist becaues get_static_value can't handle
+# All the following functions exist because get_static_value can't handle
 # their TF counterparts.