From e875b99117bb06febd46c0493fecee8cf0e4952e Mon Sep 17 00:00:00 2001 From: HydrogenSulfate <490868991@qq.com> Date: Tue, 13 Jun 2023 15:08:06 +0800 Subject: [PATCH] update formula and code for autodiff (#368) --- ppsci/autodiff/ad.py | 60 +++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/ppsci/autodiff/ad.py b/ppsci/autodiff/ad.py index b50650675..a2a7897b1 100644 --- a/ppsci/autodiff/ad.py +++ b/ppsci/autodiff/ad.py @@ -21,7 +21,7 @@ import paddle -class Jacobian: +class _Jacobian: """Compute Jacobian matrix J: J[i][j] = dy_i/dx_j, where i = 0, ..., dim_y-1 and j = 0, ..., dim_x - 1. @@ -47,19 +47,23 @@ def __call__(self, i: int = 0, j: Optional[int] = None) -> "paddle.Tensor": J[i]. """ if not 0 <= i < self.dim_y: - raise ValueError(f"i={i} is not valid.") + raise ValueError(f"i({i}) should in range [0, {self.dim_y}).") if j is not None and not 0 <= j < self.dim_x: - raise ValueError(f"j={j} is not valid.") + raise ValueError(f"j({j}) should in range [0, {self.dim_x}).") # Compute J[i] if i not in self.J: y = self.ys[:, i : i + 1] if self.dim_y > 1 else self.ys self.J[i] = paddle.grad(y, self.xs, create_graph=True)[0] - return self.J[i] if j is None or self.dim_x == 1 else self.J[i][:, j : j + 1] + return self.J[i] if (j is None or self.dim_x == 1) else self.J[i][:, j : j + 1] class Jacobians: - """Compute multiple Jacobians. + r"""Compute multiple Jacobians. + + $$ + \rm Jacobian(ys, xs, i, j) = \dfrac{\partial ys_i}{\partial xs_j} + $$ A new instance will be created for a new pair of (output, input). For the (output, input) pair that has been computed before, it will reuse the previous instance, @@ -97,21 +101,25 @@ def __call__( """ key = (ys, xs) if key not in self.Js: - self.Js[key] = Jacobian(ys, xs) + self.Js[key] = _Jacobian(ys, xs) return self.Js[key](i, j) - def clear(self): + def _clear(self): """Clear cached Jacobians.""" self.Js = {} -class Hessian: +# Use high-order differentiation with singleton pattern for convenient +jacobian = Jacobians() + + +class _Hessian: """Compute Hessian matrix H: H[i][j] = d^2y / dx_i dx_j, where i,j = 0,..., dim_x-1. It is lazy evaluation, i.e., it only computes H[i][j] when needed. Args: - y: Output Tensor of shape (batch_size, 1) or (batch_size, dim_y > 1). + ys: Output Tensor of shape (batch_size, 1) or (batch_size, dim_y > 1). xs: Input Tensor of shape (batch_size, dim_x). component: If `y` has the shape (batch_size, dim_y > 1), then `y[:, component]` is used to compute the Hessian. Do not use if `y` has the shape (batch_size, @@ -122,29 +130,32 @@ class Hessian: def __init__( self, - y: "paddle.Tensor", + ys: "paddle.Tensor", xs: "paddle.Tensor", component: Optional[int] = None, grad_y: Optional["paddle.Tensor"] = None, ): - dim_y = y.shape[1] + dim_y = ys.shape[1] if dim_y > 1: if component is None: - raise ValueError("The component of y is missing.") + raise ValueError( + f"component({component}) can not be None when dim_y({dim_y})>1." + ) if component >= dim_y: raise ValueError( - f"The component of y={component} cannot be larger than the " - f"dimension={dim_y}." + f"component({component}) should be smaller than dim_y({dim_y})." ) else: if component is not None: - raise ValueError("Do not use component for 1D y.") + raise ValueError( + f"component{component} should be set to None when dim_y({dim_y})=1." + ) component = 0 if grad_y is None: - grad_y = jacobian(y, xs, i=component, j=None) - self.H = Jacobian(grad_y, xs) + grad_y = jacobian(ys, xs, i=component, j=None) + self.H = _Jacobian(grad_y, xs) def __call__(self, i: int = 0, j: int = 0): """Returns H[`i`][`j`].""" @@ -152,7 +163,11 @@ def __call__(self, i: int = 0, j: int = 0): class Hessians: - """Compute multiple Hessians. + r"""Compute multiple Hessians. + + $$ + \rm Hessian(ys, xs, component, i, j) = \dfrac{\partial ys_{component}}{\partial xs_i \partial xs_j} + $$ A new instance will be created for a new pair of (output, input). For the (output, input) pair that has been computed before, it will reuse the previous instance, @@ -197,20 +212,19 @@ def __call__( """ key = (ys, xs, component) if key not in self.Hs: - self.Hs[key] = Hessian(ys, xs, component=component, grad_y=grad_y) + self.Hs[key] = _Hessian(ys, xs, component=component, grad_y=grad_y) return self.Hs[key](i, j) - def clear(self): + def _clear(self): """Clear cached Hessians.""" self.Hs = {} # Use high-order differentiation with singleton pattern for convenient -jacobian = Jacobians() hessian = Hessians() def clear(): """Clear cached Jacobians and Hessians.""" - jacobian.clear() - hessian.clear() + jacobian._clear() + hessian._clear()