-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Dy2St] pir dy2st unittest verification - Part 9 #59232
Changes from all commits
cf24743
e2de9e8
aad8533
1b18785
5682be4
924e4ee
07b90ff
607b7ba
b834e89
d04c3e7
49a4cd7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import warnings | ||
|
||
from paddle.base.libpaddle import DataType | ||
from paddle.base.wrapped_decorator import wrap_decorator | ||
|
||
from . import OpResult | ||
|
||
|
@@ -31,6 +32,21 @@ | |
] | ||
|
||
|
||
def _fake_interface_only_(func): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. framework.py中有一个相同的_fake_interface_only_函数,能直接用framework.py中的那个函数吗? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 会循环引用emmm,得解了才能复用 |
||
def __impl__(*args, **kwargs): | ||
raise AssertionError( | ||
f"'{func.__name__}' only can be called by `paddle.Tensor` in dynamic graph mode. Suggestions:\n" | ||
" 1. If you are in static graph mode, you can switch to dynamic graph mode by turning off `paddle.enable_static()` or calling `paddle.disable_static()`.\n" | ||
" 2. If you are using `@paddle.jit.to_static`, you can call `paddle.jit.enable_to_static(False)`. " | ||
f"If you have to translate dynamic graph to static graph, please use other API to replace '{func.__name__}'." | ||
) | ||
|
||
return __impl__ | ||
|
||
|
||
fake_interface_only = wrap_decorator(_fake_interface_only_) | ||
|
||
|
||
def create_tensor_with_batchsize(ref_var, value, dtype): | ||
assert isinstance(ref_var, OpResult) | ||
value = float(value) | ||
|
@@ -356,6 +372,43 @@ def clone(self): | |
""" | ||
return paddle.assign(self) | ||
|
||
@fake_interface_only | ||
def clear_gradient(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: 等 CI 过了让震哥来 review API 变动 |
||
""" | ||
**Notes**: | ||
**1. This API is ONLY available in Dygraph mode** | ||
|
||
**2. Use it only OpResult has gradient, normally we use this for Parameters since other temporal OpResult will be deleted by Python's GC** | ||
|
||
Clear (set to ``0`` ) the Gradient of Current OpResult | ||
|
||
Returns: None | ||
|
||
Examples: | ||
.. code-block:: python | ||
|
||
>>> import paddle | ||
>>> import paddle.base as base | ||
>>> import numpy as np | ||
|
||
>>> x = np.ones([2, 2], np.float32) | ||
>>> inputs2 = [] | ||
>>> for _ in range(10): | ||
>>> tmp = base.dygraph.base.to_variable(x) | ||
>>> tmp.stop_gradient=False | ||
>>> inputs2.append(tmp) | ||
>>> ret2 = paddle.add_n(inputs2) | ||
>>> loss2 = paddle.sum(ret2) | ||
>>> loss2.retain_grads() | ||
>>> loss2.backward() | ||
>>> print(loss2.gradient()) | ||
>>> loss2.clear_gradient() | ||
>>> print("After clear {}".format(loss2.gradient())) | ||
1.0 | ||
After clear 0.0 | ||
""" | ||
pass | ||
|
||
def append(self, var): | ||
""" | ||
**Notes**: | ||
|
@@ -383,6 +436,7 @@ def append(self, var): | |
('astype', astype), | ||
('size', _size_), | ||
('clone', clone), | ||
('clear_gradient', clear_gradient), | ||
('append', append), | ||
( | ||
'__add__', | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
from dygraph_to_static_utils import ( | ||
Dy2StTestBase, | ||
test_default_mode_only, | ||
test_sot_with_pir_only, | ||
) | ||
from predictor_utils import PredictorTools | ||
|
||
|
@@ -34,8 +35,8 @@ | |
|
||
SEED = 2020 | ||
|
||
if paddle.base.is_compiled_with_cuda(): | ||
paddle.base.set_flags({'FLAGS_cudnn_deterministic': True}) | ||
if paddle.is_compiled_with_cuda(): | ||
paddle.set_flags({'FLAGS_cudnn_deterministic': True}) | ||
|
||
|
||
class SimpleImgConvPool(paddle.nn.Layer): | ||
|
@@ -134,9 +135,9 @@ def setUp(self): | |
self.epoch_num = 1 | ||
self.batch_size = 64 | ||
self.place = ( | ||
base.CUDAPlace(0) | ||
if base.is_compiled_with_cuda() | ||
else base.CPUPlace() | ||
paddle.CUDAPlace(0) | ||
if paddle.is_compiled_with_cuda() | ||
else paddle.CPUPlace() | ||
) | ||
self.train_reader = paddle.batch( | ||
paddle.dataset.mnist.train(), | ||
|
@@ -173,14 +174,14 @@ def test_mnist_to_static(self): | |
err_msg=f'dygraph is {dygraph_loss}\n static_res is \n{static_loss}', | ||
) | ||
|
||
@test_default_mode_only | ||
@test_sot_with_pir_only | ||
def test_mnist_declarative_cpu_vs_mkldnn(self): | ||
dygraph_loss_cpu = self.train_dygraph() | ||
base.set_flags({'FLAGS_use_mkldnn': True}) | ||
paddle.set_flags({'FLAGS_use_mkldnn': True}) | ||
try: | ||
dygraph_loss_mkldnn = self.train_dygraph() | ||
finally: | ||
base.set_flags({'FLAGS_use_mkldnn': False}) | ||
paddle.set_flags({'FLAGS_use_mkldnn': False}) | ||
np.testing.assert_allclose( | ||
dygraph_loss_cpu, | ||
dygraph_loss_mkldnn, | ||
|
@@ -192,62 +193,61 @@ def test_mnist_declarative_cpu_vs_mkldnn(self): | |
|
||
def train(self, to_static=False): | ||
loss_data = [] | ||
with base.dygraph.guard(self.place): | ||
base.default_main_program().random_seed = SEED | ||
base.default_startup_program().random_seed = SEED | ||
mnist = MNIST() | ||
if to_static: | ||
mnist = paddle.jit.to_static(mnist, full_graph=True) | ||
adam = Adam(learning_rate=0.001, parameters=mnist.parameters()) | ||
|
||
for epoch in range(self.epoch_num): | ||
start = time() | ||
for batch_id, data in enumerate(self.train_reader()): | ||
dy_x_data = np.array( | ||
[x[0].reshape(1, 28, 28) for x in data] | ||
).astype('float32') | ||
y_data = ( | ||
np.array([x[1] for x in data]) | ||
.astype('int64') | ||
.reshape(-1, 1) | ||
) | ||
|
||
img = to_variable(dy_x_data) | ||
label = to_variable(y_data) | ||
|
||
label.stop_gradient = True | ||
prediction, acc, avg_loss = mnist(img, label=label) | ||
avg_loss.backward() | ||
base.default_main_program().random_seed = SEED | ||
base.default_startup_program().random_seed = SEED | ||
mnist = MNIST() | ||
if to_static: | ||
mnist = paddle.jit.to_static(mnist, full_graph=True) | ||
adam = Adam(learning_rate=0.001, parameters=mnist.parameters()) | ||
|
||
for epoch in range(self.epoch_num): | ||
start = time() | ||
for batch_id, data in enumerate(self.train_reader()): | ||
dy_x_data = np.array( | ||
[x[0].reshape(1, 28, 28) for x in data] | ||
).astype('float32') | ||
y_data = ( | ||
np.array([x[1] for x in data]) | ||
.astype('int64') | ||
.reshape(-1, 1) | ||
) | ||
|
||
adam.minimize(avg_loss) | ||
loss_data.append(float(avg_loss)) | ||
# save checkpoint | ||
mnist.clear_gradients() | ||
if batch_id % 10 == 0: | ||
print( | ||
"Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}".format( | ||
epoch, | ||
batch_id, | ||
avg_loss.numpy(), | ||
acc.numpy(), | ||
time() - start, | ||
) | ||
) | ||
start = time() | ||
if batch_id == 50: | ||
mnist.eval() | ||
prediction, acc, avg_loss = mnist(img, label) | ||
loss_data.append(float(avg_loss)) | ||
# new save load check | ||
self.check_jit_save_load( | ||
mnist, | ||
[dy_x_data], | ||
[img, label], | ||
to_static, | ||
prediction, | ||
[img.name], | ||
img = to_variable(dy_x_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 后续遇到类似to_variable这种旧的API,可否替换为paddle.to_tensor? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 可以的,已经有清理一些了,这应该是没注意到的 |
||
label = to_variable(y_data) | ||
|
||
label.stop_gradient = True | ||
prediction, acc, avg_loss = mnist(img, label=label) | ||
avg_loss.backward() | ||
|
||
adam.minimize(avg_loss) | ||
loss_data.append(float(avg_loss)) | ||
# save checkpoint | ||
mnist.clear_gradients() | ||
if batch_id % 10 == 0: | ||
print( | ||
"Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}".format( | ||
epoch, | ||
batch_id, | ||
avg_loss.numpy(), | ||
acc.numpy(), | ||
time() - start, | ||
) | ||
break | ||
) | ||
start = time() | ||
if batch_id == 50: | ||
mnist.eval() | ||
prediction, acc, avg_loss = mnist(img, label) | ||
loss_data.append(float(avg_loss)) | ||
# new save load check | ||
self.check_jit_save_load( | ||
mnist, | ||
[dy_x_data], | ||
[img, label], | ||
to_static, | ||
prediction, | ||
[img.name], | ||
) | ||
break | ||
return loss_data | ||
|
||
def check_jit_save_load( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
elif
好像没必要存在?直接删掉即可这里应该是为了对齐动态图
x.size
和老 IRx.size()
动静不统一的问题的,在 PIR 下动静统一,所以convert_attr
以及AttributeJstTransformer
应该都是可以清理了的我觉得我们可以记一个
TODO(cleanup-legacy-ir)
,说明下老 IR 退场时是可以直接删掉相关 convert 和 TransformerThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
已直接修改