Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct some spelling errors #962

Merged
merged 3 commits into from
Sep 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions trax/data/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def pad_to_max_dims(tensors, boundary=None, strict_pad_on_len=False):
to (3, 10) both and the returned tensor will have shape (2, 3, 10).

When boundary is specified, we try to pad all unknown dimensions to boundary
if possible, which can help reduce the number of different shapes occuring
if possible, which can help reduce the number of different shapes occurring
in the tensors and speed up XLA compilation. So, for example, a pair of
tensors of shapes (8, 10), (8, 9) with boundary=12 will be padded to (8, 12).

Expand Down Expand Up @@ -293,7 +293,7 @@ def _buckets_for_length(bucket_length, batch_size, max_eval_length, n_devices,
batch_size // 16, 1]
if not training:
# The last bucket batch size is always 1, but the one-but-last is
# sized to accomodate the final length = bucket_boundaries[-1], which
# sized to accommodate the final length = bucket_boundaries[-1], which
# we changed for eval above -- so adjusting here too.

# Resize if needed, since bucket_batch_sizes may not be the same size
Expand Down Expand Up @@ -382,7 +382,7 @@ def add_loss_weights(generator, id_to_mask=None):
- If the stream consists of pairs `(inputs, targets)`, a loss mask is added
that is creates as a tensor of ones of the same shape as targets.
- If `id_to_mask` is not `None`, and the stream (after the previous point)
has triples `(inputs, targets, weights)`, the weights are multipled by a
has triples `(inputs, targets, weights)`, the weights are multiplied by a
0/1 mask that is 0 iff targets is equal to `id_to_mask` (1 otherwise).

Args:
Expand Down
2 changes: 1 addition & 1 deletion trax/fastmath/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def use_backend(name):


def backend_name():
"""Returns the name of the backend curently in use ('tf' or 'jax')."""
"""Returns the name of the backend currently in use ('tf' or 'jax')."""
return backend()['name']


Expand Down
2 changes: 1 addition & 1 deletion trax/layers/research/efficient_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def permute_vjp(val):
permuted = permute_impl(fastmath.stop_gradient(val))
def vjpfun(permuted_grad):
# JAX autodiff would synthesize a scatter operation because it doesn't
# know that the indices are a permutatation. However on TPU, gathers are
# know that the indices are a permutation. However on TPU, gathers are
# faster than scatters (at least in the regime the LSH attention uses).
return (np.take(permuted_grad, inverse_permutation, axis=axis),)
return permuted, vjpfun
Expand Down
2 changes: 1 addition & 1 deletion trax/models/research/rezero.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def ReZeroTransformerLM(vocab_size,
ff_activation: the non-linearity in feed-forward layer

Returns:
A ReZero trasnformer language model as a layer that maps from a tensor of
A ReZero transformer language model as a layer that maps from a tensor of
tokens to activations over a vocab set.
"""
positional_encoder = [
Expand Down
2 changes: 1 addition & 1 deletion trax/optimizers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Trainer(object):
have already been initialized.

The output after running the `one_step` function is just the loss from the
loss layer and optimizer statisics but, as a side effect, it also updates
loss layer and optimizer statistics but, as a side effect, it also updates
the weights of the loss layer and the slots of the optimizer.
"""

Expand Down
6 changes: 3 additions & 3 deletions trax/rl/actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ def advantage_std(self):


class A2C(AdvantageBasedActorCriticAgent):
"""Trains policy and value models using the A2C algortithm."""
"""Trains policy and value models using the A2C algorithm."""

on_policy = True

Expand Down Expand Up @@ -564,7 +564,7 @@ def f(log_probs, advantages, old_log_probs, mask):
class PPO(AdvantageBasedActorCriticAgent):
"""The Proximal Policy Optimization Algorithm aka PPO.

Trains policy and value models using the PPO algortithm.
Trains policy and value models using the PPO algorithm.
"""

on_policy = True
Expand Down Expand Up @@ -596,7 +596,7 @@ def f(new_log_probs, advantages, old_log_probs, mask):
', %s != %s' % (new_log_probs.shape, mask.shape))

# The ratio between new_probs and old_probs expressed
# using log_probs and exponentaion
# using log_probs and exponentiation
probs_ratio = jnp.exp(new_log_probs - old_log_probs)
if advantages.shape != probs_ratio.shape:
raise ValueError('New log-probs and old log probs shapes '
Expand Down
2 changes: 1 addition & 1 deletion trax/rl/rl_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def ProbsRatio(dist_inputs, actions, old_log_probs, log_prob_fun):
f'new_log_probs.shape was {new_log_probs.shape} and'
f'old_log_probs.shape was {old_log_probs.shape}')
# The ratio between new_probs and old_probs expressed
# using log_probs and exponentaion
# using log_probs and exponentiation
probs_ratio = jnp.exp(new_log_probs - old_log_probs)
return probs_ratio

Expand Down
2 changes: 1 addition & 1 deletion trax/rl/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ def pad(tensor_list):
])
# Where act, rew and ret will usually have the following shape:
# [batch_size, trajectory_length-1], which we call [B, L-1].
# Observations are more complex and will usuall be [B, L] + S where S
# Observations are more complex and will usually be [B, L] + S where S
# is the shape of the observation space (self.observation_space.shape).
# We stop the recursion at level 1, so we pass lists of arrays into
# pad().
Expand Down
2 changes: 1 addition & 1 deletion trax/rl/task_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def test_trajectory_stream_margin(self):
self.assertTrue(next_slice.timesteps[i].done)
self.assertFalse(next_slice.timesteps[i].mask)
got_done = True
# Assert that we got a done somewhere, otherwise the test is not trigerred.
# Assert that we got a done somewhere, otherwise the test is not triggered.
# Not getting done has low probability (1/2^10) but is possible, flaky test.
self.assertTrue(got_done)

Expand Down
4 changes: 2 additions & 2 deletions trax/rl_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def train_rl(
num_actions: None unless one wants to use the discretization wrapper. Then
num_actions specifies the number of discrete actions.
light_rl: whether to use the light RL setting (experimental).
light_rl_trainer: whichh light RL trainer to use (experimental).
light_rl_trainer: which light RL trainer to use (experimental).
"""
tf_np.set_allow_float64(FLAGS.tf_allow_float64)

Expand Down Expand Up @@ -213,7 +213,7 @@ def main(argv):
gin_configs = FLAGS.config or []
gin.parse_config_files_and_bindings(FLAGS.config_file, gin_configs)

logging.info('Gin cofig:')
logging.info('Gin config:')
logging.info(gin_configs)

train_rl(
Expand Down
6 changes: 3 additions & 3 deletions trax/supervised/trainer_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ def train_step(self, batch):
if self._should_log_now():
for name, value in stat.items():
# TODO(afrozm): value is a scalar, but sometimes JAX is crashing here
# with a device put array error complaning that it should be an array.
# On multiple devices, take the mean.
# with a device put array error complaining that it should be an array.
# On multiple devices, take the mean.
scalar_value = np.mean(np.array(value))
self._train_sw.scalar('training/' + name, scalar_value, step=self._step)
self._step += 1
Expand Down Expand Up @@ -489,7 +489,7 @@ def _should_log_now(self):
and (self._step == 1 or self._step % 10 == 0))

def _for_n_devices(self, x):
"""Replicates/broadcasts `x` for n devices if `self.n_devicess > 1`."""
"""Replicates/broadcasts `x` for n devices if `self.n_devices > 1`."""
return tl.for_n_devices(x, self.n_devices) # pylint: disable=protected-access

def close(self):
Expand Down
2 changes: 1 addition & 1 deletion trax/supervised/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def _log_training_progress(self, task, total_loss, n_steps, elapsed_time,
task (TrainTask): The current task.
total_loss: Total training loss accumulated over n_steps training steps.
n_steps: Number of steps over which the metrics were accumulated.
elapsed_time: Time of execusion of n_steps training steps.
elapsed_time: Time of execution of n_steps training steps.
optimizer_metrics: Dict from optimizer metric name to metric values.
summary_writer: Jaxboard summary writer for saving provided metrics.
"""
Expand Down
2 changes: 1 addition & 1 deletion trax/supervised/training_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_train_dense_layer_evals(self):
self.assertEqual(10, training_session.step) # Unchanged

def test_summaries_are_written(self):
"""Training writes down metrics when writting is turned on."""
"""Training writes down metrics when writing is turned on."""
model = tl.Serial(tl.Dense(1))
task = training.TrainTask(
_very_simple_data(), tl.L2Loss(), optimizers.SGD(.01))
Expand Down
2 changes: 1 addition & 1 deletion trax/tf_numpy/numpy_impl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def _maybe_static(x):
return value


# All the following functions exist becaues get_static_value can't handle
# All the following functions exist because get_static_value can't handle
# their TF counterparts.


Expand Down