Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update formula and code for autodiff #368

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 37 additions & 23 deletions ppsci/autodiff/ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import paddle


class Jacobian:
class _Jacobian:
"""Compute Jacobian matrix J: J[i][j] = dy_i/dx_j, where i = 0, ..., dim_y-1 and
j = 0, ..., dim_x - 1.

Expand All @@ -47,19 +47,23 @@ def __call__(self, i: int = 0, j: Optional[int] = None) -> "paddle.Tensor":
J[i].
"""
if not 0 <= i < self.dim_y:
raise ValueError(f"i={i} is not valid.")
raise ValueError(f"i({i}) should in range [0, {self.dim_y}).")
if j is not None and not 0 <= j < self.dim_x:
raise ValueError(f"j={j} is not valid.")
raise ValueError(f"j({j}) should in range [0, {self.dim_x}).")
# Compute J[i]
if i not in self.J:
y = self.ys[:, i : i + 1] if self.dim_y > 1 else self.ys
self.J[i] = paddle.grad(y, self.xs, create_graph=True)[0]

return self.J[i] if j is None or self.dim_x == 1 else self.J[i][:, j : j + 1]
return self.J[i] if (j is None or self.dim_x == 1) else self.J[i][:, j : j + 1]


class Jacobians:
"""Compute multiple Jacobians.
r"""Compute multiple Jacobians.

$$
\rm Jacobian(ys, xs, i, j) = \dfrac{\partial ys_i}{\partial xs_j}
$$

A new instance will be created for a new pair of (output, input). For the (output,
input) pair that has been computed before, it will reuse the previous instance,
Expand Down Expand Up @@ -97,21 +101,25 @@ def __call__(
"""
key = (ys, xs)
if key not in self.Js:
self.Js[key] = Jacobian(ys, xs)
self.Js[key] = _Jacobian(ys, xs)
return self.Js[key](i, j)

def clear(self):
def _clear(self):
"""Clear cached Jacobians."""
self.Js = {}


class Hessian:
# Use high-order differentiation with singleton pattern for convenient
jacobian = Jacobians()


class _Hessian:
"""Compute Hessian matrix H: H[i][j] = d^2y / dx_i dx_j, where i,j = 0,..., dim_x-1.

It is lazy evaluation, i.e., it only computes H[i][j] when needed.

Args:
y: Output Tensor of shape (batch_size, 1) or (batch_size, dim_y > 1).
ys: Output Tensor of shape (batch_size, 1) or (batch_size, dim_y > 1).
xs: Input Tensor of shape (batch_size, dim_x).
component: If `y` has the shape (batch_size, dim_y > 1), then `y[:, component]`
is used to compute the Hessian. Do not use if `y` has the shape (batch_size,
Expand All @@ -122,37 +130,44 @@ class Hessian:

def __init__(
self,
y: "paddle.Tensor",
ys: "paddle.Tensor",
xs: "paddle.Tensor",
component: Optional[int] = None,
grad_y: Optional["paddle.Tensor"] = None,
):
dim_y = y.shape[1]
dim_y = ys.shape[1]

if dim_y > 1:
if component is None:
raise ValueError("The component of y is missing.")
raise ValueError(
f"component({component}) can not be None when dim_y({dim_y})>1."
)
if component >= dim_y:
raise ValueError(
f"The component of y={component} cannot be larger than the "
f"dimension={dim_y}."
f"component({component}) should be smaller than dim_y({dim_y})."
)
else:
if component is not None:
raise ValueError("Do not use component for 1D y.")
raise ValueError(
f"component{component} should be set to None when dim_y({dim_y})=1."
)
component = 0

if grad_y is None:
grad_y = jacobian(y, xs, i=component, j=None)
self.H = Jacobian(grad_y, xs)
grad_y = jacobian(ys, xs, i=component, j=None)
self.H = _Jacobian(grad_y, xs)

def __call__(self, i: int = 0, j: int = 0):
"""Returns H[`i`][`j`]."""
return self.H(i, j)


class Hessians:
"""Compute multiple Hessians.
r"""Compute multiple Hessians.

$$
\rm Hessian(ys, xs, component, i, j) = \dfrac{\partial ys_{component}}{\partial xs_i \partial xs_j}
$$

A new instance will be created for a new pair of (output, input). For the (output,
input) pair that has been computed before, it will reuse the previous instance,
Expand Down Expand Up @@ -197,20 +212,19 @@ def __call__(
"""
key = (ys, xs, component)
if key not in self.Hs:
self.Hs[key] = Hessian(ys, xs, component=component, grad_y=grad_y)
self.Hs[key] = _Hessian(ys, xs, component=component, grad_y=grad_y)
return self.Hs[key](i, j)

def clear(self):
def _clear(self):
"""Clear cached Hessians."""
self.Hs = {}


# Use high-order differentiation with singleton pattern for convenient
jacobian = Jacobians()
hessian = Hessians()


def clear():
"""Clear cached Jacobians and Hessians."""
jacobian.clear()
hessian.clear()
jacobian._clear()
hessian._clear()