From 510b75d0c04a3d6e79596b1b5514a674c01ea759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 3 Mar 2023 02:24:03 +0100 Subject: [PATCH 1/2] Update mypy job to torch 2.0 --- .github/workflows/code-checks.yml | 2 +- requirements/typing.txt | 2 +- .../fabric/plugins/collectives/collective.py | 4 ++-- .../plugins/collectives/torch_collective.py | 14 +++++++------- src/lightning/fabric/strategies/ddp.py | 2 +- src/lightning/fabric/strategies/fsdp.py | 2 +- src/lightning/fabric/strategies/parallel.py | 5 ++++- src/lightning/fabric/utilities/cloud_io.py | 9 ++++++--- src/lightning/fabric/utilities/distributed.py | 2 +- src/lightning/fabric/utilities/types.py | 9 ++++++--- src/lightning/pytorch/overrides/distributed.py | 4 ++-- src/lightning/pytorch/strategies/ddp.py | 4 +--- src/lightning/pytorch/strategies/parallel.py | 5 ++++- src/lightning/pytorch/utilities/compile.py | 18 ++++++++++++------ src/lightning/pytorch/utilities/distributed.py | 2 +- 15 files changed, 50 insertions(+), 34 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 61c5d19c0d8eb..e2bcef94adeea 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -45,7 +45,7 @@ jobs: FREEZE_REQUIREMENTS: 1 run: | # todo: adjust requirements for both code-bases - pip install -e '.[extra,ui,cloud]' -r requirements/typing.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html + pip install -e '.[extra,ui,cloud]' -r requirements/typing.txt -f https://download.pytorch.org/whl/test/cpu/torch_test.html" --pre pip list - name: Check typing diff --git a/requirements/typing.txt b/requirements/typing.txt index d33582199bbe8..01b3aaa775d30 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==0.982 -torch==1.12.0 +torch==2.0.0 types-Markdown types-PyYAML diff --git a/src/lightning/fabric/plugins/collectives/collective.py b/src/lightning/fabric/plugins/collectives/collective.py index 364a81ea4c291..1d03b9bb62b5d 100644 --- a/src/lightning/fabric/plugins/collectives/collective.py +++ b/src/lightning/fabric/plugins/collectives/collective.py @@ -70,11 +70,11 @@ def all_to_all(self, output_tensor_list: List[Tensor], input_tensor_list: List[T ... @abstractmethod - def send(self, tensor: Tensor, dst: int, tag: Optional[int] = 0) -> None: + def send(self, tensor: Tensor, dst: int, tag: int = 0) -> None: ... @abstractmethod - def recv(self, tensor: Tensor, src: Optional[int] = None, tag: Optional[int] = 0) -> Tensor: + def recv(self, tensor: Tensor, src: Optional[int] = None, tag: int = 0) -> Tensor: ... @abstractmethod diff --git a/src/lightning/fabric/plugins/collectives/torch_collective.py b/src/lightning/fabric/plugins/collectives/torch_collective.py index cf7a0f0cade1e..4159735d6706c 100644 --- a/src/lightning/fabric/plugins/collectives/torch_collective.py +++ b/src/lightning/fabric/plugins/collectives/torch_collective.py @@ -28,11 +28,11 @@ def __init__(self) -> None: @property def rank(self) -> int: # local rank - return dist.get_rank(self.group) + return dist.get_rank(self.group) # type: ignore[arg-type] @property def world_size(self) -> int: - return dist.get_world_size(self.group) + return dist.get_world_size(self.group) # type: ignore[arg-type] def broadcast(self, tensor: Tensor, src: int) -> Tensor: dist.broadcast(tensor, src, group=self.group) @@ -71,11 +71,11 @@ def all_to_all(self, output_tensor_list: List[Tensor], input_tensor_list: List[T dist.all_to_all(output_tensor_list, input_tensor_list, group=self.group) return output_tensor_list - def send(self, tensor: Tensor, dst: int, tag: Optional[int] = 0) -> None: - dist.send(tensor, dst, tag=tag, group=self.group) + def send(self, tensor: Tensor, dst: int, tag: int = 0) -> None: + dist.send(tensor, dst, tag=tag, group=self.group) # type: ignore[arg-type] - def recv(self, tensor: Tensor, src: Optional[int] = None, tag: Optional[int] = 0) -> Tensor: - dist.recv(tensor, src, tag=tag, group=self.group) + def recv(self, tensor: Tensor, src: Optional[int] = None, tag: int = 0) -> Tensor: + dist.recv(tensor, src, tag=tag, group=self.group) # type: ignore[arg-type] return tensor def all_gather_object(self, object_list: List[Any], obj: Any) -> List[Any]: @@ -168,7 +168,7 @@ def new_group(cls, **kwargs: Any) -> CollectibleGroup: def destroy_group(cls, group: CollectibleGroup) -> None: # can be called by all processes in the default group, group will be `object()` if they are not part of the # current group - dist.destroy_process_group(group) + dist.destroy_process_group(group) # type: ignore[arg-type] @classmethod def _convert_to_native_op(cls, op: Union[str, ReduceOp, RedOpType]) -> Union[ReduceOp, RedOpType]: diff --git a/src/lightning/fabric/strategies/ddp.py b/src/lightning/fabric/strategies/ddp.py index 54cac12e7a60c..24b69eefa8509 100644 --- a/src/lightning/fabric/strategies/ddp.py +++ b/src/lightning/fabric/strategies/ddp.py @@ -209,5 +209,5 @@ def no_backward_sync(self, module: Module) -> Generator: f" `{self.__class__.__name__}.no_backward_sync` is wrapped in `DistributedDataParallel`." f" Got: {module.__class__.__name__}." ) - with module.no_sync(): # type: ignore[operator] + with module.no_sync(): yield diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py index 7e94fdd989910..c54a98f0fad1d 100644 --- a/src/lightning/fabric/strategies/fsdp.py +++ b/src/lightning/fabric/strategies/fsdp.py @@ -279,7 +279,7 @@ def clip_gradients_norm( # type: ignore[override] """Clip gradients by norm.""" rank_zero_warn("Gradient Clipping by Norm is currently experimental for FSDP. Proceed with Caution!") self.precision.unscale_gradients(optimizer) - return module.clip_grad_norm_(max_norm=max_norm, norm_type=norm_type) # type: ignore[return-value] + return module.clip_grad_norm_(max_norm=max_norm, norm_type=norm_type) def clip_gradients_value( # type: ignore[override] self, module: "FullyShardedDataParallel", optimizer: Optimizer, clip_val: Union[float, int] diff --git a/src/lightning/fabric/strategies/parallel.py b/src/lightning/fabric/strategies/parallel.py index d7dd1ea07ce65..ef4c69746ae9e 100644 --- a/src/lightning/fabric/strategies/parallel.py +++ b/src/lightning/fabric/strategies/parallel.py @@ -94,7 +94,10 @@ def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool: bool: The reduced boolean decision. """ decision = torch.tensor(int(decision), device=self.root_device) - decision = self.all_reduce(decision, reduce_op=ReduceOp.SUM) + decision = self.all_reduce( + decision, + reduce_op=ReduceOp.SUM, # type: ignore[arg-type] + ) decision = bool(decision == self.world_size) if all else bool(decision) return decision diff --git a/src/lightning/fabric/utilities/cloud_io.py b/src/lightning/fabric/utilities/cloud_io.py index a8657e2369301..ecabbbdb6bb74 100644 --- a/src/lightning/fabric/utilities/cloud_io.py +++ b/src/lightning/fabric/utilities/cloud_io.py @@ -37,15 +37,18 @@ def _load( """ if not isinstance(path_or_url, (str, Path)): # any sort of BytesIO or similar - return torch.load(path_or_url, map_location=map_location) + return torch.load( + path_or_url, + map_location=map_location, # type: ignore[arg-type] # upstream annotation is not correct + ) if str(path_or_url).startswith("http"): return torch.hub.load_state_dict_from_url( str(path_or_url), - map_location=map_location, # type: ignore[arg-type] # upstream annotation is not correct + map_location=map_location, # type: ignore[arg-type] ) fs = get_filesystem(path_or_url) with fs.open(path_or_url, "rb") as f: - return torch.load(f, map_location=map_location) + return torch.load(f, map_location=map_location) # type: ignore[arg-type] def get_filesystem(path: _PATH, **kwargs: Any) -> AbstractFileSystem: diff --git a/src/lightning/fabric/utilities/distributed.py b/src/lightning/fabric/utilities/distributed.py index 2b574e020b4d9..f41f541ebf565 100644 --- a/src/lightning/fabric/utilities/distributed.py +++ b/src/lightning/fabric/utilities/distributed.py @@ -130,7 +130,7 @@ def _sync_ddp(result: Tensor, group: Optional[Any] = None, reduce_op: Optional[U op: Optional[ReduceOp] if isinstance(reduce_op, str): if reduce_op.lower() in ("avg", "mean"): - op = ReduceOp.SUM + op = ReduceOp.SUM # type: ignore[assignment] divide_by_world_size = True else: op = getattr(ReduceOp, reduce_op.upper()) diff --git a/src/lightning/fabric/utilities/types.py b/src/lightning/fabric/utilities/types.py index 5fc73fab5a865..9909fe5554be0 100644 --- a/src/lightning/fabric/utilities/types.py +++ b/src/lightning/fabric/utilities/types.py @@ -17,6 +17,7 @@ import torch from torch import Tensor from torch.optim import Optimizer +from typing_extensions import TypeAlias from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_13, _TORCH_GREATER_EQUAL_2_0 @@ -29,7 +30,7 @@ if torch.distributed.is_available(): from torch.distributed import ProcessGroup, ReduceOp - RedOpType = ReduceOp.RedOpType if _TORCH_GREATER_EQUAL_1_13 else object + RedOpType: TypeAlias = ReduceOp.RedOpType if _TORCH_GREATER_EQUAL_1_13 else object # type: ignore[misc] else: ProcessGroup = Any # type: ignore[assignment,misc] ReduceOp = object # type: ignore[assignment,misc] # we are using isinstance check once @@ -73,8 +74,10 @@ def step(self, epoch: Optional[int] = None) -> None: ... -_TORCH_LRSCHEDULER = ( - torch.optim.lr_scheduler.LRScheduler if _TORCH_GREATER_EQUAL_2_0 else torch.optim.lr_scheduler._LRScheduler +_TORCH_LRSCHEDULER: TypeAlias = ( + torch.optim.lr_scheduler.LRScheduler # type: ignore[misc] + if _TORCH_GREATER_EQUAL_2_0 + else torch.optim.lr_scheduler._LRScheduler ) diff --git a/src/lightning/pytorch/overrides/distributed.py b/src/lightning/pytorch/overrides/distributed.py index b41ed71b29c2e..75657b060ff2e 100644 --- a/src/lightning/pytorch/overrides/distributed.py +++ b/src/lightning/pytorch/overrides/distributed.py @@ -41,7 +41,7 @@ def _find_tensors( def prepare_for_backward(model: DistributedDataParallel, output: Any) -> None: # `prepare_for_backward` is `DistributedDataParallel` specific. if torch.is_grad_enabled() and model.require_backward_grad_sync: - model.require_forward_param_sync = True # type: ignore[assignment] + model.require_forward_param_sync = True # We'll return the output object verbatim since it is a freeform # object. We need to find any tensors in this object, though, # because we need to figure out which parameters were used during @@ -52,7 +52,7 @@ def prepare_for_backward(model: DistributedDataParallel, output: Any) -> None: reducer._rebuild_buckets() # avoids "INTERNAL ASSERT FAILED" with `find_unused_parameters=False` reducer.prepare_for_backward(args) else: - model.require_forward_param_sync = False # type: ignore[assignment] + model.require_forward_param_sync = False class UnrepeatedDistributedSampler(DistributedSampler): diff --git a/src/lightning/pytorch/strategies/ddp.py b/src/lightning/pytorch/strategies/ddp.py index 4c61da4bd1f88..e7c98ece6d1ce 100644 --- a/src/lightning/pytorch/strategies/ddp.py +++ b/src/lightning/pytorch/strategies/ddp.py @@ -408,9 +408,7 @@ def teardown(self) -> None: pl_module = self.lightning_module if isinstance(self.model, DistributedDataParallel): - if not self.model.static_graph and self.model._get_ddp_logging_data().get( # type: ignore[operator] - "can_set_static_graph" - ): + if not self.model.static_graph and self.model._get_ddp_logging_data().get("can_set_static_graph"): rank_zero_info( "Your model can run with static graph optimizations. For future training runs, we suggest you" f" pass `Trainer(..., strategy={self.__class__.__name__}(static_graph=True))` to enable them." diff --git a/src/lightning/pytorch/strategies/parallel.py b/src/lightning/pytorch/strategies/parallel.py index 0e48e4044daee..265e3fb79266a 100644 --- a/src/lightning/pytorch/strategies/parallel.py +++ b/src/lightning/pytorch/strategies/parallel.py @@ -99,7 +99,10 @@ def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool: bool: The reduced boolean decision. """ decision = torch.tensor(int(decision), device=self.root_device) - decision = self.reduce(decision, reduce_op=ReduceOp.SUM) + decision = self.reduce( + decision, + reduce_op=ReduceOp.SUM, # type: ignore[arg-type] + ) decision = bool(decision == self.world_size) if all else bool(decision) return decision diff --git a/src/lightning/pytorch/utilities/compile.py b/src/lightning/pytorch/utilities/compile.py index 3cc6dc7566c1e..77eb01133fbe2 100644 --- a/src/lightning/pytorch/utilities/compile.py +++ b/src/lightning/pytorch/utilities/compile.py @@ -78,6 +78,10 @@ def to_uncompiled(model: Union["pl.LightningModule", "torch._dynamo.OptimizedMod if isinstance(model, OptimizedModule): model = model._orig_mod + if not isinstance(model, pl.LightningModule): + raise TypeError( + f"Unexpected error, the wrapped model should be a LightningModule, found {type(model).__name__}" + ) elif isinstance(model, pl.LightningModule): if model._compiler_ctx is None: @@ -88,12 +92,14 @@ def to_uncompiled(model: Union["pl.LightningModule", "torch._dynamo.OptimizedMod else: raise ValueError("`model` must either be an instance of OptimizedModule or LightningModule") - model.forward = model._compiler_ctx["original_forward"] - model.training_step = model._compiler_ctx["original_training_step"] - model.validation_step = model._compiler_ctx["original_validation_step"] - model.test_step = model._compiler_ctx["original_test_step"] - model.predict_step = model._compiler_ctx["original_predict_step"] - model._compiler_ctx = None + ctx = model._compiler_ctx + if ctx is not None: + model.forward = ctx["original_forward"] # type: ignore[assignment] + model.training_step = ctx["original_training_step"] # type: ignore[assignment] + model.validation_step = ctx["original_validation_step"] # type: ignore[assignment] + model.test_step = ctx["original_test_step"] # type: ignore[assignment] + model.predict_step = ctx["original_predict_step"] # type: ignore[assignment] + model._compiler_ctx = None return model diff --git a/src/lightning/pytorch/utilities/distributed.py b/src/lightning/pytorch/utilities/distributed.py index b16393273e05e..c21f0c8c4c767 100644 --- a/src/lightning/pytorch/utilities/distributed.py +++ b/src/lightning/pytorch/utilities/distributed.py @@ -119,7 +119,7 @@ def register_ddp_comm_hook( ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook) rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.") - model.register_comm_hook(state=ddp_comm_state, hook=ddp_comm_hook) # type: ignore[operator] + model.register_comm_hook(state=ddp_comm_state, hook=ddp_comm_hook) def _broadcast_object_list(obj: Any, rank: int) -> Any: From ce33838a6181f50a3c738661b6d685a19ccd0d94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 3 Mar 2023 02:27:42 +0100 Subject: [PATCH 2/2] -f --- .github/workflows/code-checks.yml | 2 +- requirements/typing.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index e2bcef94adeea..3cc066fb380d9 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -45,7 +45,7 @@ jobs: FREEZE_REQUIREMENTS: 1 run: | # todo: adjust requirements for both code-bases - pip install -e '.[extra,ui,cloud]' -r requirements/typing.txt -f https://download.pytorch.org/whl/test/cpu/torch_test.html" --pre + pip install -e '.[extra,ui,cloud]' -r requirements/typing.txt pip list - name: Check typing diff --git a/requirements/typing.txt b/requirements/typing.txt index 01b3aaa775d30..e44f97d0e33ee 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,4 +1,5 @@ mypy==0.982 +-f https://download.pytorch.org/whl/test/cpu/torch_test.html --pre torch==2.0.0 types-Markdown