diff --git a/plsc/core/grad_clip.py b/plsc/core/grad_clip.py index bb2356bd35fa1..1668f307a514c 100644 --- a/plsc/core/grad_clip.py +++ b/plsc/core/grad_clip.py @@ -14,6 +14,7 @@ import warnings import paddle +from paddle import _legacy_C_ops as _C_ops def _squared_l2_norm(x): @@ -22,7 +23,7 @@ def _squared_l2_norm(x): sum_square = paddle.sum(square) return sum_square - return paddle._C_ops.squared_l2_norm(x) + return _C_ops.squared_l2_norm(x) class ClipGradByGlobalNorm(object): diff --git a/plsc/core/grad_scaler.py b/plsc/core/grad_scaler.py index a56e8712d0378..4ba1527665a5f 100644 --- a/plsc/core/grad_scaler.py +++ b/plsc/core/grad_scaler.py @@ -15,7 +15,7 @@ from collections import defaultdict from paddle.amp import GradScaler as FrameworkGradScaler from paddle.fluid.dygraph.amp import OptimizerState -from paddle import _C_ops +from paddle import _legacy_C_ops as _C_ops import paddle diff --git a/plsc/optimizer/adamw.py b/plsc/optimizer/adamw.py index 03a26ca813e0b..7ff4614dfbc28 100644 --- a/plsc/optimizer/adamw.py +++ b/plsc/optimizer/adamw.py @@ -18,6 +18,7 @@ import math import paddle +from paddle import _legacy_C_ops as _C_ops from .optimizer import Optimizer from plsc.utils import logger @@ -100,7 +101,7 @@ def step(self): paddle.float16, paddle.bfloat16 }: master_param = state['master_param'] - _, _, _, _, _, _ = paddle._C_ops.adamw( + _, _, _, _, _, _ = _C_ops.adamw( p, grad, paddle.to_tensor(lr), exp_avg, exp_avg_sq, beta1_pow, beta2_pow, master_param, p, exp_avg, exp_avg_sq, beta1_pow, diff --git a/plsc/optimizer/momentum.py b/plsc/optimizer/momentum.py index 3849e04dcca3e..84121e3502b4a 100644 --- a/plsc/optimizer/momentum.py +++ b/plsc/optimizer/momentum.py @@ -18,6 +18,7 @@ import math import paddle +from paddle import _legacy_C_ops as _C_ops from .optimizer import Optimizer from plsc.utils import logger @@ -101,7 +102,7 @@ def step(self): axis = getattr(p, 'axis', None) assert index is not None assert axis is not None - _, _, _ = paddle._C_ops.sparse_momentum( + _, _, _ = _C_ops.sparse_momentum( p, grad, exp_avg, @@ -125,7 +126,7 @@ def step(self): 'multi_precision', master_param is not None) else: - _, _, _ = paddle._C_ops.momentum( + _, _, _ = _C_ops.momentum( p, grad, exp_avg,