From e37215d4a82f4710cd07bbf1436a78e78d6de5bf Mon Sep 17 00:00:00 2001 From: jializheng Date: Sat, 26 Aug 2023 19:42:16 +0800 Subject: [PATCH 01/13] add npu support dtypes --- accelerator/npu_accelerator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/accelerator/npu_accelerator.py b/accelerator/npu_accelerator.py index d62f22620b67..5678a0266386 100644 --- a/accelerator/npu_accelerator.py +++ b/accelerator/npu_accelerator.py @@ -134,6 +134,9 @@ def is_bf16_supported(self): def is_fp16_supported(self): return True + def supported_dtypes(self): + return [torch.float, torch.half, torch.bfloat16] + # Misc def amp(self): if hasattr(torch.npu, 'amp'): From ac78f3a7c68ccee54cdb0e5c48d9ebc4aa292425 Mon Sep 17 00:00:00 2001 From: jializheng Date: Fri, 15 Sep 2023 11:52:00 +0800 Subject: [PATCH 02/13] add npu fused_adam support --- accelerator/npu_accelerator.py | 45 ++++++++++++++-------- op_builder/npu/__init__.py | 2 +- op_builder/npu/fused_adam.py | 70 ++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 16 deletions(-) create mode 100644 op_builder/npu/fused_adam.py diff --git a/accelerator/npu_accelerator.py b/accelerator/npu_accelerator.py index 5678a0266386..ce291249098f 100644 --- a/accelerator/npu_accelerator.py +++ b/accelerator/npu_accelerator.py @@ -2,16 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 # DeepSpeed Team +import importlib +import inspect -import torch from .abstract_accelerator import DeepSpeedAccelerator +# During setup stage torch may not be installed, pass on no torch will +# allow op builder related API to be executed. +try: + import torch.npu +except ImportError: + pass class NPU_Accelerator(DeepSpeedAccelerator): - def __init__(self): + super().__init__() self._name = 'npu' self._communication_backend_name = 'hccl' + # dict that holds class name <--> class type mapping i.e. + # 'AsyncIOBuilder': + # this dict will be filled at init stage + self.class_dict = None def is_synchronized_device(self): return False @@ -207,25 +218,29 @@ def op_builder_dir(self): except ImportError: return "deepspeed.ops.op_builder.npu" + def _lazy_init_class_dict(self): + if self.class_dict: + return + + op_builder_module = importlib.import_module(self.op_builder_dir()) + + # get op builder class from op_builder/npu/__init__.py + self.class_dict = {} + for class_name, class_obj in inspect.getmembers(op_builder_module, inspect.isclass): + self.class_dict[class_name] = class_obj + # create an instance of op builder and return, name specified by class_name def create_op_builder(self, class_name): builder_class = self.get_op_builder(class_name) - if builder_class != None: - return builder_class() - return None + return None if builder_class is None else builder_class() # return an op builder class, name specified by class_name def get_op_builder(self, class_name): - try: - # is op_builder from deepspeed or a 3p version? this should only succeed if it's deepspeed - # if successful this also means we're doing a local install and not JIT compile path - from op_builder import __deepspeed__ # noqa: F401 # type: ignore - from op_builder.npu import NotImplementedBuilder - except ImportError: - from deepspeed.ops.op_builder.npu import NotImplementedBuilder - - # return a NPUNotImplementedBuilder to avoid get NoneType[Name] in unit tests - return NotImplementedBuilder + self._lazy_init_class_dict() + if class_name in self.class_dict: + return self.class_dict[class_name] + else: + return self.class_dict['NotImplementedBuilder'] if 'NotImplementedBuilder' in self.class_dict else None def build_extension(self): from torch.utils.cpp_extension import BuildExtension diff --git a/op_builder/npu/__init__.py b/op_builder/npu/__init__.py index 3efcc859aaa8..0d9e76b3903f 100644 --- a/op_builder/npu/__init__.py +++ b/op_builder/npu/__init__.py @@ -5,5 +5,5 @@ '''Copyright The Microsoft DeepSpeed Team''' # NPU related operators will be added in the future. - +from .fused_adam import FusedAdamBuilder from .no_impl import NotImplementedBuilder diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py new file mode 100644 index 000000000000..98e0e1a9381a --- /dev/null +++ b/op_builder/npu/fused_adam.py @@ -0,0 +1,70 @@ +from .builder import NPUOpBuilder + +try: + import torch_npu +except ImportError as e: + pass + + +class NPUFusedAdam: + @staticmethod + def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, beta2, epsilon, step, adam_w_mode, + bias_correction, weight_decay, *args): + bias_correction1 = beta1 ** step + bias_correction2 = beta2 ** step + + # iteration group['params'] + for i in range(len(tensor_lists[0])): + grad_flat = tensor_lists[0][i] + param_flat = tensor_lists[1][i] + m_flat = tensor_lists[2][i] + v_flat = tensor_lists[3][i] + + if adam_w_mode: + param_flat.data, m_flat, v_flat = torch_npu.npu_apply_adam_w( + bias_correction1, + bias_correction2, + lr, + weight_decay, + beta1, + beta2, + epsilon, + grad_flat, + None, # max_grad_norm + False, # amsgrad + False, # maximize + out=(param_flat.data, m_flat, v_flat) + ) + else: + param_flat.data, m_flat, v_flat = torch_npu.npu_apply_adam( + bias_correction1, + bias_correction2, + lr, + beta1, + beta2, + epsilon, + grad_flat, + False, # use_locking + False, # use_nesterov + out=(param_flat.data, m_flat, v_flat) + ) + + +class FusedAdamBuilder(NPUOpBuilder): + BUILD_VAR = "DS_BUILD_FUSED_ADAM" + NAME = "fused_adam" + + def __init__(self): + super().__init__(name=self.NAME) + + def absolute_name(self): + return f'deepspeed.ops.adam.{self.NAME}_op' + + def sources(self): + return [] + + def include_paths(self): + return [] + + def load(self, verbose=True): + return NPUFusedAdam From c87ef3202e246709c982398b304469159e4c3985 Mon Sep 17 00:00:00 2001 From: jializheng Date: Thu, 28 Sep 2023 10:37:45 +0800 Subject: [PATCH 03/13] add license --- op_builder/npu/fused_adam.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index 98e0e1a9381a..a0b0d0f6e29a 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -1,3 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + from .builder import NPUOpBuilder try: From 258eac1befbdb2622fe306329950eb02cd3a8b7f Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:41:04 +0800 Subject: [PATCH 04/13] Update accelerator/npu_accelerator.py Co-authored-by: Hz, Ji --- accelerator/npu_accelerator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/accelerator/npu_accelerator.py b/accelerator/npu_accelerator.py index 1f483dd76bce..618178de7dbd 100644 --- a/accelerator/npu_accelerator.py +++ b/accelerator/npu_accelerator.py @@ -16,6 +16,7 @@ class NPU_Accelerator(DeepSpeedAccelerator): def __init__(self): + super().__init__() self._name = 'npu' self._communication_backend_name = 'hccl' From 620b85e76a32fed23226f0dbeec04074cd8a7e71 Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:41:20 +0800 Subject: [PATCH 05/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index a0b0d0f6e29a..ca1d290db1b3 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -16,7 +16,7 @@ class NPUFusedAdam: def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, beta2, epsilon, step, adam_w_mode, bias_correction, weight_decay, *args): bias_correction1 = beta1 ** step - bias_correction2 = beta2 ** step + bias_correction2 = beta2**step # iteration group['params'] for i in range(len(tensor_lists[0])): From dd94fcc40d055b31f09e0b06ac84282beb9ebcba Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:43:21 +0800 Subject: [PATCH 06/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 1 + 1 file changed, 1 insertion(+) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index ca1d290db1b3..04c8b797ae10 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -12,6 +12,7 @@ class NPUFusedAdam: + @staticmethod def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, beta2, epsilon, step, adam_w_mode, bias_correction, weight_decay, *args): From b41f20cfef8d3b1cea75352510ceccddf014352f Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:43:32 +0800 Subject: [PATCH 07/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index 04c8b797ae10..451f1f91719f 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -16,7 +16,7 @@ class NPUFusedAdam: @staticmethod def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, beta2, epsilon, step, adam_w_mode, bias_correction, weight_decay, *args): - bias_correction1 = beta1 ** step + bias_correction1 = beta1**step bias_correction2 = beta2**step # iteration group['params'] From 0487035c298d3519acc95afb287e79459d853805 Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:43:46 +0800 Subject: [PATCH 08/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index 451f1f91719f..c15fc649eb45 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -39,7 +39,7 @@ def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, bet None, # max_grad_norm False, # amsgrad False, # maximize - out=(param_flat.data, m_flat, v_flat) + out=(param_flat.data, m_flat, v_flat)) ) else: param_flat.data, m_flat, v_flat = torch_npu.npu_apply_adam( From 52a2a8c69016e7f4b87edeb26e799bb86eaedfe5 Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:43:55 +0800 Subject: [PATCH 09/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 1 - 1 file changed, 1 deletion(-) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index c15fc649eb45..8c7180587912 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -40,7 +40,6 @@ def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, bet False, # amsgrad False, # maximize out=(param_flat.data, m_flat, v_flat)) - ) else: param_flat.data, m_flat, v_flat = torch_npu.npu_apply_adam( bias_correction1, From d808f8622f0e8b4dc0142cdb3f4dfc62a9fdaafb Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:44:16 +0800 Subject: [PATCH 10/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index 8c7180587912..1bb33c62ff79 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -51,7 +51,7 @@ def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, bet grad_flat, False, # use_locking False, # use_nesterov - out=(param_flat.data, m_flat, v_flat) + out=(param_flat.data, m_flat, v_flat)) ) From fb77cd0504ecb4c5ec356ee46d31de78aa7470d3 Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Sat, 14 Oct 2023 14:45:25 +0800 Subject: [PATCH 11/13] Update op_builder/npu/fused_adam.py Co-authored-by: Hz, Ji --- op_builder/npu/fused_adam.py | 1 - 1 file changed, 1 deletion(-) diff --git a/op_builder/npu/fused_adam.py b/op_builder/npu/fused_adam.py index 1bb33c62ff79..fc1bc83c7cc7 100644 --- a/op_builder/npu/fused_adam.py +++ b/op_builder/npu/fused_adam.py @@ -52,7 +52,6 @@ def multi_tensor_adam(chunk_size, noop_flag_buffer, tensor_lists, lr, beta1, bet False, # use_locking False, # use_nesterov out=(param_flat.data, m_flat, v_flat)) - ) class FusedAdamBuilder(NPUOpBuilder): From 7b26ce40463a1adb6e2a33cc4f1bedd55adcbaba Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Mon, 16 Oct 2023 09:02:41 +0800 Subject: [PATCH 12/13] Update accelerator/npu_accelerator.py Co-authored-by: Hz, Ji --- accelerator/npu_accelerator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/accelerator/npu_accelerator.py b/accelerator/npu_accelerator.py index 618178de7dbd..1f483dd76bce 100644 --- a/accelerator/npu_accelerator.py +++ b/accelerator/npu_accelerator.py @@ -16,7 +16,6 @@ class NPU_Accelerator(DeepSpeedAccelerator): def __init__(self): - super().__init__() self._name = 'npu' self._communication_backend_name = 'hccl' From da7edd6515c8a47b955527c2481d4e87e6a1fce6 Mon Sep 17 00:00:00 2001 From: CurryRice233 Date: Mon, 16 Oct 2023 09:03:08 +0800 Subject: [PATCH 13/13] Update accelerator/npu_accelerator.py Co-authored-by: Hz, Ji --- accelerator/npu_accelerator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/accelerator/npu_accelerator.py b/accelerator/npu_accelerator.py index 1f483dd76bce..0d82a96e4456 100644 --- a/accelerator/npu_accelerator.py +++ b/accelerator/npu_accelerator.py @@ -15,6 +15,7 @@ class NPU_Accelerator(DeepSpeedAccelerator): + def __init__(self): super().__init__() self._name = 'npu'