Skip to content

Commit

Permalink
update formula and code for autodiff (PaddlePaddle#368)
Browse files Browse the repository at this point in the history
  • Loading branch information
HydrogenSulfate authored Jun 13, 2023
1 parent f16fe48 commit e875b99
Showing 1 changed file with 37 additions and 23 deletions.
60 changes: 37 additions & 23 deletions ppsci/autodiff/ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import paddle


class Jacobian:
class _Jacobian:
"""Compute Jacobian matrix J: J[i][j] = dy_i/dx_j, where i = 0, ..., dim_y-1 and
j = 0, ..., dim_x - 1.
Expand All @@ -47,19 +47,23 @@ def __call__(self, i: int = 0, j: Optional[int] = None) -> "paddle.Tensor":
J[i].
"""
if not 0 <= i < self.dim_y:
raise ValueError(f"i={i} is not valid.")
raise ValueError(f"i({i}) should in range [0, {self.dim_y}).")
if j is not None and not 0 <= j < self.dim_x:
raise ValueError(f"j={j} is not valid.")
raise ValueError(f"j({j}) should in range [0, {self.dim_x}).")
# Compute J[i]
if i not in self.J:
y = self.ys[:, i : i + 1] if self.dim_y > 1 else self.ys
self.J[i] = paddle.grad(y, self.xs, create_graph=True)[0]

return self.J[i] if j is None or self.dim_x == 1 else self.J[i][:, j : j + 1]
return self.J[i] if (j is None or self.dim_x == 1) else self.J[i][:, j : j + 1]


class Jacobians:
"""Compute multiple Jacobians.
r"""Compute multiple Jacobians.
$$
\rm Jacobian(ys, xs, i, j) = \dfrac{\partial ys_i}{\partial xs_j}
$$
A new instance will be created for a new pair of (output, input). For the (output,
input) pair that has been computed before, it will reuse the previous instance,
Expand Down Expand Up @@ -97,21 +101,25 @@ def __call__(
"""
key = (ys, xs)
if key not in self.Js:
self.Js[key] = Jacobian(ys, xs)
self.Js[key] = _Jacobian(ys, xs)
return self.Js[key](i, j)

def clear(self):
def _clear(self):
"""Clear cached Jacobians."""
self.Js = {}


class Hessian:
# Use high-order differentiation with singleton pattern for convenient
jacobian = Jacobians()


class _Hessian:
"""Compute Hessian matrix H: H[i][j] = d^2y / dx_i dx_j, where i,j = 0,..., dim_x-1.
It is lazy evaluation, i.e., it only computes H[i][j] when needed.
Args:
y: Output Tensor of shape (batch_size, 1) or (batch_size, dim_y > 1).
ys: Output Tensor of shape (batch_size, 1) or (batch_size, dim_y > 1).
xs: Input Tensor of shape (batch_size, dim_x).
component: If `y` has the shape (batch_size, dim_y > 1), then `y[:, component]`
is used to compute the Hessian. Do not use if `y` has the shape (batch_size,
Expand All @@ -122,37 +130,44 @@ class Hessian:

def __init__(
self,
y: "paddle.Tensor",
ys: "paddle.Tensor",
xs: "paddle.Tensor",
component: Optional[int] = None,
grad_y: Optional["paddle.Tensor"] = None,
):
dim_y = y.shape[1]
dim_y = ys.shape[1]

if dim_y > 1:
if component is None:
raise ValueError("The component of y is missing.")
raise ValueError(
f"component({component}) can not be None when dim_y({dim_y})>1."
)
if component >= dim_y:
raise ValueError(
f"The component of y={component} cannot be larger than the "
f"dimension={dim_y}."
f"component({component}) should be smaller than dim_y({dim_y})."
)
else:
if component is not None:
raise ValueError("Do not use component for 1D y.")
raise ValueError(
f"component{component} should be set to None when dim_y({dim_y})=1."
)
component = 0

if grad_y is None:
grad_y = jacobian(y, xs, i=component, j=None)
self.H = Jacobian(grad_y, xs)
grad_y = jacobian(ys, xs, i=component, j=None)
self.H = _Jacobian(grad_y, xs)

def __call__(self, i: int = 0, j: int = 0):
"""Returns H[`i`][`j`]."""
return self.H(i, j)


class Hessians:
"""Compute multiple Hessians.
r"""Compute multiple Hessians.
$$
\rm Hessian(ys, xs, component, i, j) = \dfrac{\partial ys_{component}}{\partial xs_i \partial xs_j}
$$
A new instance will be created for a new pair of (output, input). For the (output,
input) pair that has been computed before, it will reuse the previous instance,
Expand Down Expand Up @@ -197,20 +212,19 @@ def __call__(
"""
key = (ys, xs, component)
if key not in self.Hs:
self.Hs[key] = Hessian(ys, xs, component=component, grad_y=grad_y)
self.Hs[key] = _Hessian(ys, xs, component=component, grad_y=grad_y)
return self.Hs[key](i, j)

def clear(self):
def _clear(self):
"""Clear cached Hessians."""
self.Hs = {}


# Use high-order differentiation with singleton pattern for convenient
jacobian = Jacobians()
hessian = Hessians()


def clear():
"""Clear cached Jacobians and Hessians."""
jacobian.clear()
hessian.clear()
jacobian._clear()
hessian._clear()

0 comments on commit e875b99

Please sign in to comment.