Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rem is_compiled_with_npu #52385

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ exclude =
./python/paddle/fluid/tra**,
# Exclude third-party libraries
./python/paddle/utils/gast/**,
# Exclude files that will be removed in the future, see more at
# https://github.com/PaddlePaddle/Paddle/pull/46782#issuecomment-1273033731
./python/paddle/fluid/tests/unittests/npu/**,
ignore =
# Whitespace before ‘,’, ‘;’, or ‘:’, it is not compatible with black
E203,
Expand Down
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ exclude: |
patches/.+|
paddle/fluid/framework/fleet/heter_ps/cudf/.+|
paddle/fluid/distributed/ps/thirdparty/round_robin.h|
python/paddle/utils/gast/.+|
python/paddle/fluid/tests/unittests/npu/.+
python/paddle/utils/gast/.+
)$
repos:
# Common hooks
Expand Down
27 changes: 0 additions & 27 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -265,14 +265,6 @@ bool IsCompiledWithROCM() {
#endif
}

bool IsCompiledWithAscend() {
#ifndef PADDLE_WITH_ASCEND
return false;
#else
return true;
#endif
}

bool IsCompiledWithXPU() {
#ifndef PADDLE_WITH_XPU
return false;
Expand All @@ -281,8 +273,6 @@ bool IsCompiledWithXPU() {
#endif
}

bool IsCompiledWithNPU() { return false; }

bool IsCompiledWithCustomDevice(std::string device_type) {
#ifndef PADDLE_WITH_CUSTOM_DEVICE
return false;
Expand Down Expand Up @@ -1592,14 +1582,6 @@ All parameter, weight, gradient are variables in Paddle.
return context;
#endif
})
.def_static(
"create",
[](paddle::platform::NPUPlace &place)
-> paddle::platform::DeviceContext * {
PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use NPUPlace in CPU/GPU/XPU version, "
"Please recompile or reinstall Paddle with NPU support."));
})
.def_static("create",
[](paddle::platform::CustomPlace &place)
-> paddle::platform::DeviceContext * {
Expand Down Expand Up @@ -1769,13 +1751,6 @@ All parameter, weight, gradient are variables in Paddle.
pybind11::gil_scoped_release release;
self.Run(scope, place);
})
.def("run",
[](OperatorBase &self,
const Scope &scope,
const platform::NPUPlace &place) {
pybind11::gil_scoped_release release;
self.Run(scope, place);
})
.def("run",
[](OperatorBase &self,
const Scope &scope,
Expand Down Expand Up @@ -1985,9 +1960,7 @@ All parameter, weight, gradient are variables in Paddle.
});
m.def("is_compiled_with_avx", IsCompiledWithAVX);
m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_ascend", IsCompiledWithAscend);
m.def("is_compiled_with_rocm", IsCompiledWithROCM);
m.def("is_compiled_with_npu", IsCompiledWithNPU);
m.def("is_compiled_with_custom_device", IsCompiledWithCustomDevice);
m.def("is_compiled_with_ipu", IsCompiledWithIPU);
m.def("is_compiled_with_xpu", IsCompiledWithXPU);
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ extend_skip_glob = [
"python/paddle/fluid/[!t]**",
"python/paddle/fluid/tra**",
"python/paddle/utils/gast/**",
"python/paddle/fluid/tests/unittests/npu/**",
]

[tool.ruff]
Expand All @@ -23,7 +22,6 @@ exclude = [
"./python/paddle/fluid/[!t]**",
"./python/paddle/fluid/tra**",
"./python/paddle/utils/gast/**",
"./python/paddle/fluid/tests/unittests/npu/**",
]
target-version = "py37"
select = [
Expand Down
3 changes: 0 additions & 3 deletions python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@
from .framework import CPUPlace # noqa: F401
from .framework import IPUPlace # noqa: F401
from .framework import CUDAPlace # noqa: F401
from .framework import NPUPlace # noqa: F401
from .framework import CUDAPinnedPlace # noqa: F401
from .framework import CustomPlace # noqa: F401

Expand Down Expand Up @@ -363,7 +362,6 @@
from .device import set_device # noqa: F401
from .device import get_device # noqa: F401
from .device import is_compiled_with_xpu # noqa: F401
from .device import is_compiled_with_npu # noqa: F401
from .device import is_compiled_with_ipu # noqa: F401
from .device import is_compiled_with_cinn # noqa: F401
from .device import is_compiled_with_cuda # noqa: F401
Expand Down Expand Up @@ -512,7 +510,6 @@
'histogram',
'multiplex',
'CUDAPlace',
'NPUPlace',
'empty',
'shape',
'real',
Expand Down
5 changes: 0 additions & 5 deletions python/paddle/amp/auto_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,18 +344,13 @@ def amp_guard(
if enable and not (
tracer._expected_place.is_gpu_place()
or tracer._expected_place.is_xpu_place()
or tracer._expected_place.is_npu_place()
or tracer._expected_place.is_custom_place()
):
warnings.warn(
'amp_guard can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
% tracer._expected_place
)
enable = False
# For npu:
if tracer._expected_place.is_npu_place() and (dtype == 'bfloat16'):
warnings.warn('NPUPlace only support float16 amp.')
enable = False
# For xpu:
if tracer._expected_place.is_xpu_place() and (dtype == 'bfloat16'):
warnings.warn('XPUPlace only support float16 amp.')
Expand Down
101 changes: 31 additions & 70 deletions python/paddle/amp/grad_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,10 @@ def __init__(
if enable and not (
tracer._expected_place.is_gpu_place()
or tracer._expected_place.is_xpu_place()
or tracer._expected_place.is_npu_place()
or tracer._expected_place.is_custom_place()
):
warnings.warn(
'AmpScaler can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace and CustomPlace, current place is %s, so it makes no effect.'
'AmpScaler can only be enabled on CUDAPlace, XPUPlace, MLUPlace and CustomPlace, current place is %s, so it makes no effect.'
% tracer._expected_place
)
enable = False
Expand Down Expand Up @@ -326,74 +325,36 @@ def _unscale(self, optimizer):
if param.dtype == core.VarDesc.VarType.FP32
]
self._found_inf = self._temp_found_inf_value_false
if core.is_compiled_with_npu():
float_status = _legacy_C_ops.alloc_float_status()
_legacy_C_ops.clear_float_status(float_status, float_status)

if len(param_grads_fp16):
_legacy_C_ops.check_finite_and_unscale(
param_grads_fp16,
self._scale,
float_status,
param_grads_fp16,
self._temp_found_inf_fp16,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_fp16
)
if len(param_grads_bf16):
_legacy_C_ops.check_finite_and_unscale(
param_grads_bf16,
self._scale,
float_status,
param_grads_bf16,
self._temp_found_inf_bf16,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_bf16
)
if len(param_grads_fp32):
_legacy_C_ops.check_finite_and_unscale(
param_grads_fp32,
self._scale,
float_status,
param_grads_fp32,
self._temp_found_inf_fp32,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_fp32
)
else:
if len(param_grads_fp16):
_legacy_C_ops.check_finite_and_unscale(
param_grads_fp16,
self._scale,
param_grads_fp16,
self._temp_found_inf_fp16,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_fp16
)
if len(param_grads_bf16):
_legacy_C_ops.check_finite_and_unscale(
param_grads_bf16,
self._scale,
param_grads_bf16,
self._temp_found_inf_bf16,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_bf16
)
if len(param_grads_fp32):
_legacy_C_ops.check_finite_and_unscale(
param_grads_fp32,
self._scale,
param_grads_fp32,
self._temp_found_inf_fp32,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_fp32
)
if len(param_grads_fp16):
_legacy_C_ops.check_finite_and_unscale(
param_grads_fp16,
self._scale,
param_grads_fp16,
self._temp_found_inf_fp16,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_fp16
)
if len(param_grads_bf16):
_legacy_C_ops.check_finite_and_unscale(
param_grads_bf16,
self._scale,
param_grads_bf16,
self._temp_found_inf_bf16,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_bf16
)
if len(param_grads_fp32):
_legacy_C_ops.check_finite_and_unscale(
param_grads_fp32,
self._scale,
param_grads_fp32,
self._temp_found_inf_fp32,
)
self._found_inf = _C_ops.bitwise_or(
self._found_inf, self._temp_found_inf_fp32
)

optimizer_state["state"] = OptimizerState.UNSCALED

Expand Down
64 changes: 4 additions & 60 deletions python/paddle/device/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
'is_compiled_with_cinn',
'is_compiled_with_cuda',
'is_compiled_with_rocm',
'is_compiled_with_npu',
'is_compiled_with_custom_device',
'get_all_device_type',
'get_all_custom_device_type',
Expand All @@ -53,24 +52,6 @@
_cudnn_version = None


# TODO: WITH_ASCEND_CL may changed to WITH_NPU or others in the future
# for consistent.
def is_compiled_with_npu():
"""
Whether paddle was built with WITH_ASCEND_CL=ON to support Ascend NPU.

Return:
bool, ``True`` if NPU is supported, otherwise ``False``.

Examples:
.. code-block:: python

import paddle
support_npu = paddle.device.is_compiled_with_npu()
"""
return core.is_compiled_with_npu()


def is_compiled_with_custom_device(device_type):
"""
Whether paddle was built with Paddle_CUSTOM_DEVICE .
Expand Down Expand Up @@ -210,15 +191,6 @@ def _convert_to_place(device):
selected_xpus = os.getenv("FLAGS_selected_xpus", "0").split(",")
device_id = int(selected_xpus[0])
place = core.XPUPlace(device_id)
elif lower_device == 'npu':
if not core.is_compiled_with_npu():
raise ValueError(
"The device should not be 'npu', "
"since PaddlePaddle is not compiled with NPU"
)
selected_npus = os.getenv("FLAGS_selected_npus", "0").split(",")
device_id = int(selected_npus[0])
place = core.NPUPlace(device_id)
elif lower_device == 'ipu':
if not core.is_compiled_with_ipu():
raise ValueError(
Expand All @@ -229,7 +201,6 @@ def _convert_to_place(device):
else:
avaliable_gpu_device = re.match(r'gpu:\d+', lower_device)
avaliable_xpu_device = re.match(r'xpu:\d+', lower_device)
avaliable_npu_device = re.match(r'npu:\d+', lower_device)
if avaliable_gpu_device:
if not core.is_compiled_with_cuda():
raise ValueError(
Expand All @@ -250,31 +221,7 @@ def _convert_to_place(device):
device_id = device_info_list[1]
device_id = int(device_id)
place = core.XPUPlace(device_id)
if avaliable_npu_device:
if not core.is_compiled_with_npu():
device_info_list = device.split(':', 1)
device_type = device_info_list[0]
if device_type in core.get_all_custom_device_type():
device_id = device_info_list[1]
device_id = int(device_id)
place = core.CustomPlace(device_type, device_id)
return place
else:
raise ValueError(
"The device should not be {}, since PaddlePaddle is "
"not compiled with NPU or compiled with custom device".format(
avaliable_npu_device
)
)
device_info_list = device.split(':', 1)
device_id = device_info_list[1]
device_id = int(device_id)
place = core.NPUPlace(device_id)
if (
not avaliable_gpu_device
and not avaliable_xpu_device
and not avaliable_npu_device
):
if not avaliable_gpu_device and not avaliable_xpu_device:
device_info_list = device.split(':', 1)
device_type = device_info_list[0]
if device_type in core.get_all_custom_device_type():
Expand Down Expand Up @@ -346,9 +293,6 @@ def get_device():
elif isinstance(place, core.XPUPlace):
device_id = place.get_device_id()
device = 'xpu:' + str(device_id)
elif isinstance(place, core.NPUPlace):
device_id = place.get_device_id()
device = 'npu:' + str(device_id)
elif isinstance(place, core.IPUPlace):
num_devices = core.get_ipu_device_count()
device = f"ipus:{{0-{num_devices - 1}}}"
Expand Down Expand Up @@ -469,7 +413,7 @@ class Event:
Parameters:
device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None.
It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,
where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
enable_timing (bool, optional): indicates if the event should measure time, default is False
blocking (bool, optional): if True, ``wait`` will be blocking, default is False
interprocess (bool): if True, the event can be shared between processes, default is False
Expand Down Expand Up @@ -614,7 +558,7 @@ class Stream:
Parameters:
device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None.
It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,
where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
priority(int, optional): priority of the CUDA stream. Can be either
1 (high priority) or 2 (low priority). By default, streams have
priority 2.
Expand Down Expand Up @@ -936,7 +880,7 @@ def synchronize(device=None):
Parameters:
device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for. If device is None, the device is the current device. Default: None.
It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,
where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n).
where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n).
Examples:
.. code-block:: python
# required: custom_device
Expand Down
5 changes: 0 additions & 5 deletions python/paddle/distributed/collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,6 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout):
core.NCCLParallelContext(strategy, place).init_with_ring_id(
ring_id
)
elif core.is_compiled_with_npu():
place = core.NPUPlace(genv.device_id)
core.HCCLParallelContext(strategy, place).init_with_ring_id(
ring_id
)
elif core.is_compiled_with_xpu():
place = core.XPUPlace(genv.device_id)
core.BKCLParallelContext(strategy, place).init_with_ring_id(
Expand Down
Loading