Merge pull request #965 from levskaya:optimizer_fixes

PiperOrigin-RevId: 355229213
google · Feb 2, 2021 · 61580b9 · 61580b9
2 parents b112b11 + f9038b1
commit 61580b9
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/flax/optim/adam.py b/flax/optim/adam.py
@@ -98,7 +98,7 @@ def apply_param_gradient(self, step, hyper_params, param, state, grad):
     grad_sq_ema = beta2 * state.grad_sq_ema + (1. - beta2) * grad_sq
 
     # bias correction
-    t = step + 1.
+    t = jnp.array(step + 1, lax.dtype(param.dtype))
     grad_ema_corr = grad_ema / (1 - beta1 ** t)
     grad_sq_ema_corr = grad_sq_ema / (1 - beta2 ** t)
 

diff --git a/flax/optim/lamb.py b/flax/optim/lamb.py
@@ -74,7 +74,7 @@ def apply_param_gradient(self, step, hyper_params, param, state, grad):
     grad_ema = beta1 * state.grad_ema + (1. - beta1) * grad
     grad_sq_ema = beta2 * state.grad_sq_ema + (1. - beta2) * grad_sq
 
-    t = step + 1.
+    t = jnp.array(step + 1, lax.dtype(param.dtype))
     grad_ema_corr = grad_ema / (1. - beta1 ** t)
     grad_sq_ema_corr = grad_sq_ema / (1. - beta2 ** t)