Repo checker (#9450)

* Remove default mypy option * Implement mypy ignore-without-code option * Enable mypy redundant-expr * Fix erroneous tuple types * Remove ruff target-version, redundant with project * Use extend selections for ruff * Fix B009 and B010 with ruff * Fix test parametrization * Fix FutureWarning * Make zips strict unless it is causing errors In which case set them to explicit False * Add a commit message for pre-commit autoupdate
pydata · Sep 10, 2024 · 6baceca · 6baceca
1 parent cc74d3a
commit 6baceca
Show file tree

Hide file tree

Showing 78 changed files with 421 additions and 313 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,7 @@
 # https://pre-commit.com/
 ci:
  autoupdate_schedule: monthly
+ autoupdate_commit_msg: 'Update pre-commit hooks'
 exclude: 'xarray/datatree_.*'
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -724,7 +724,7 @@ class PerformanceBackend(xr.backends.BackendEntrypoint):
  def open_dataset(
  self,
  filename_or_obj: str | os.PathLike | None,
- drop_variables: tuple[str] = None,
+ drop_variables: tuple[str, ...] = None,
  *,
  mask_and_scale=True,
  decode_times=True,

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -174,7 +174,7 @@ def setup(self, use_cftime, use_flox):
  # GH9426 - deep-copying CFTime object arrays is weirdly slow
  asda = xr.DataArray(time)
  labeled_time = []
- for year, month in zip(asda.dt.year, asda.dt.month):
+ for year, month in zip(asda.dt.year, asda.dt.month, strict=True):
  labeled_time.append(cftime.datetime(year, month, 1))
 
  self.da = xr.DataArray(

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -64,7 +64,7 @@ def time_rolling_long(self, func, pandas, use_bottleneck):
  def time_rolling_np(self, window_, min_periods, use_bottleneck):
  with xr.set_options(use_bottleneck=use_bottleneck):
  self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
- getattr(np, "nansum")
+ np.nansum
  ).load()
 
  @parameterized(

diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst
@@ -193,7 +193,7 @@ different type:
 
 .. ipython:: python
 
- def sparse_random_arrays(shape: tuple[int]) -> sparse._coo.core.COO:
+ def sparse_random_arrays(shape: tuple[int, ...]) -> sparse._coo.core.COO:
  """Strategy which generates random sparse.COO arrays"""
  if shape is None:
  shape = npst.array_shapes()

diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
@@ -80,7 +80,7 @@ def test_roundtrip_dataarray(data, arr) -> None:
  tuple
  )
  )
- coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)}
+ coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape, strict=True)}
  original = xr.DataArray(arr, dims=names, coords=coords)
  roundtripped = xr.DataArray(original.to_pandas())
  xr.testing.assert_identical(original, roundtripped)

diff --git a/pyproject.toml b/pyproject.toml
@@ -84,14 +84,13 @@ source = ["xarray"]
 exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
 
 [tool.mypy]
-enable_error_code = "redundant-self"
+enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"]
 exclude = [
  'build',
  'xarray/util/generate_.*\.py',
  'xarray/datatree_/doc/.*\.py',
 ]
 files = "xarray"
-show_error_codes = true
 show_error_context = true
 warn_redundant_casts = true
 warn_unused_configs = true
@@ -240,7 +239,6 @@ extend-exclude = [
  "doc",
  "_typed_ops.pyi",
 ]
-target-version = "py310"
 
 [tool.ruff.lint]
 # E402: module level import not at top of file
@@ -249,13 +247,13 @@ target-version = "py310"
 extend-safe-fixes = [
  "TID252", # absolute imports
 ]
-ignore = [
+extend-ignore = [
  "E402",
  "E501",
  "E731",
  "UP007",
 ]
-select = [
+extend-select = [
  "F", # Pyflakes
  "E", # Pycodestyle
  "W",

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -51,7 +51,7 @@
  try:
  from dask.delayed import Delayed
  except ImportError:
- Delayed = None # type: ignore
+ Delayed = None # type: ignore[assignment, misc]
  from io import BufferedIOBase
 
  from xarray.backends.common import BackendEntrypoint
@@ -1113,7 +1113,7 @@ def open_mfdataset(
  list(combined_ids_paths.keys()),
  list(combined_ids_paths.values()),
  )
- elif combine == "by_coords" and concat_dim is not None:
+ elif concat_dim is not None:
  raise ValueError(
  "When combine='by_coords', passing a value for `concat_dim` has no "
  "effect. To manually combine along a specific dimension you should "
@@ -1432,7 +1432,7 @@ def to_netcdf(
  store.sync()
  return target.getvalue()
  finally:
- if not multifile and compute:
+ if not multifile and compute: # type: ignore[redundant-expr]
  store.close()
 
  if not compute:
@@ -1585,8 +1585,9 @@ def save_mfdataset(
  multifile=True,
  **kwargs,
  )
- for ds, path, group in zip(datasets, paths, groups)
- ]
+ for ds, path, group in zip(datasets, paths, groups, strict=True)
+ ],
+ strict=True,
  )
 
  try:
@@ -1600,7 +1601,10 @@ def save_mfdataset(
  import dask
 
  return dask.delayed(
- [dask.delayed(_finalize_store)(w, s) for w, s in zip(writes, stores)]
+ [
+ dask.delayed(_finalize_store)(w, s)
+ for w, s in zip(writes, stores, strict=True)
+ ]
  )
 
 

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -431,7 +431,7 @@ def set_dimensions(self, variables, unlimited_dims=None):
  for v in unlimited_dims: # put unlimited_dims first
  dims[v] = None
  for v in variables.values():
- dims.update(dict(zip(v.dims, v.shape)))
+ dims.update(dict(zip(v.dims, v.shape, strict=True)))
 
  for dim, length in dims.items():
  if dim in existing_dims and length != existing_dims[dim]:

diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
@@ -276,7 +276,7 @@ def __getstate__(self):
  def __setstate__(self, state) -> None:
  """Restore from a pickle."""
  opener, args, mode, kwargs, lock, manager_id = state
- self.__init__( # type: ignore
+ self.__init__( # type: ignore[misc]
  opener, *args, mode=mode, kwargs=kwargs, lock=lock, manager_id=manager_id
  )
 

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -208,7 +208,9 @@ def open_store_variable(self, name, var):
  "shuffle": var.shuffle,
  }
  if var.chunks:
- encoding["preferred_chunks"] = dict(zip(var.dimensions, var.chunks))
+ encoding["preferred_chunks"] = dict(
+ zip(var.dimensions, var.chunks, strict=True)
+ )
  # Convert h5py-style compression options to NetCDF4-Python
  # style, if possible
  if var.compression == "gzip":

diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
@@ -278,7 +278,9 @@ def _extract_nc4_variable_encoding(
  chunksizes = encoding["chunksizes"]
  chunks_too_big = any(
  c > d and dim not in unlimited_dims
- for c, d, dim in zip(chunksizes, variable.shape, variable.dims)
+ for c, d, dim in zip(
+ chunksizes, variable.shape, variable.dims, strict=False
+ )
  )
  has_original_shape = "original_shape" in encoding
  changed_shape = (
@@ -446,7 +448,9 @@ def open_store_variable(self, name: str, var):
  else:
  encoding["contiguous"] = False
  encoding["chunksizes"] = tuple(chunking)
- encoding["preferred_chunks"] = dict(zip(var.dimensions, chunking))
+ encoding["preferred_chunks"] = dict(
+ zip(var.dimensions, chunking, strict=True)
+ )
  # TODO: figure out how to round-trip "endian-ness" without raising
  # warnings from netCDF4
  # encoding['endian'] = var.endian()

diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py
@@ -199,7 +199,7 @@ def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint:
  "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html"
  )
  backend = engines[engine]
- elif isinstance(engine, type) and issubclass(engine, BackendEntrypoint):
+ elif issubclass(engine, BackendEntrypoint):
  backend = engine()
  else:
  raise TypeError(

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -186,7 +186,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
  # TODO: incorporate synchronizer to allow writes from multiple dask
  # threads
  if var_chunks and enc_chunks_tuple:
- for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks):
+ for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks, strict=True):
  for dchunk in dchunks[:-1]:
  if dchunk % zchunk:
  base_error = (
@@ -548,13 +548,13 @@ def open_store_variable(self, name, zarr_array=None):
 
  encoding = {
  "chunks": zarr_array.chunks,
- "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
+ "preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)),
  "compressor": zarr_array.compressor,
  "filters": zarr_array.filters,
  }
  # _FillValue needs to be in attributes, not encoding, so it will get
  # picked up by decode_cf
- if getattr(zarr_array, "fill_value") is not None:
+ if zarr_array.fill_value is not None:
  attributes["_FillValue"] = zarr_array.fill_value
 
  return Variable(dimensions, data, attributes, encoding)
@@ -576,7 +576,7 @@ def get_dimensions(self):
  dimensions = {}
  for k, v in self.zarr_group.arrays():
  dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr)
- for d, s in zip(dim_names, v.shape):
+ for d, s in zip(dim_names, v.shape, strict=True):
  if d in dimensions and dimensions[d] != s:
  raise ValueError(
  f"found conflicting lengths for dimension {d} "

diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py
@@ -198,7 +198,7 @@ def convert_calendar(
  _convert_to_new_calendar_with_new_day_of_year(
  date, newdoy, calendar, use_cftime
  )
- for date, newdoy in zip(time.variable._data.array, new_doy)
+ for date, newdoy in zip(time.variable._data.array, new_doy, strict=True)
  ],
  dims=(dim,),
  name=dim,

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -204,7 +204,7 @@ def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]:
 
 
 def _decode_cf_datetime_dtype(
- data, units: str, calendar: str, use_cftime: bool | None
+ data, units: str, calendar: str | None, use_cftime: bool | None
 ) -> np.dtype:
  # Verify that at least the first and last date can be decoded
  # successfully. Otherwise, tracebacks end up swallowed by
@@ -704,7 +704,7 @@ def _cast_to_dtype_if_safe(num: np.ndarray, dtype: np.dtype) -> np.ndarray:
 
 
 def encode_cf_datetime(
- dates: T_DuckArray, # type: ignore
+ dates: T_DuckArray, # type: ignore[misc]
  units: str | None = None,
  calendar: str | None = None,
  dtype: np.dtype | None = None,
@@ -726,7 +726,7 @@ def encode_cf_datetime(
 
 
 def _eagerly_encode_cf_datetime(
- dates: T_DuckArray, # type: ignore
+ dates: T_DuckArray, # type: ignore[misc]
  units: str | None = None,
  calendar: str | None = None,
  dtype: np.dtype | None = None,
@@ -809,7 +809,7 @@ def _eagerly_encode_cf_datetime(
 
 
 def _encode_cf_datetime_within_map_blocks(
- dates: T_DuckArray, # type: ignore
+ dates: T_DuckArray, # type: ignore[misc]
  units: str,
  calendar: str,
  dtype: np.dtype,
@@ -859,7 +859,7 @@ def _lazily_encode_cf_datetime(
 
 
 def encode_cf_timedelta(
- timedeltas: T_DuckArray, # type: ignore
+ timedeltas: T_DuckArray, # type: ignore[misc]
  units: str | None = None,
  dtype: np.dtype | None = None,
 ) -> tuple[T_DuckArray, str]:
@@ -871,7 +871,7 @@ def encode_cf_timedelta(
 
 
 def _eagerly_encode_cf_timedelta(
- timedeltas: T_DuckArray, # type: ignore
+ timedeltas: T_DuckArray, # type: ignore[misc]
  units: str | None = None,
  dtype: np.dtype | None = None,
  allow_units_modification: bool = True,
@@ -923,7 +923,7 @@ def _eagerly_encode_cf_timedelta(
 
 
 def _encode_cf_timedelta_within_map_blocks(
- timedeltas: T_DuckArray, # type:ignore
+ timedeltas: T_DuckArray, # type: ignore[misc]
  units: str,
  dtype: np.dtype,
 ) -> T_DuckArray:

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -405,14 +405,15 @@ def align_indexes(self) -> None:
  zip(
  [joined_index] + matching_indexes,
  [joined_index_vars] + matching_index_vars,
+ strict=True,
  )
  )
  need_reindex = self._need_reindex(dims, cmp_indexes)
  else:
  if len(matching_indexes) > 1:
  need_reindex = self._need_reindex(
  dims,
- list(zip(matching_indexes, matching_index_vars)),
+ list(zip(matching_indexes, matching_index_vars, strict=True)),
  )
  else:
  need_reindex = False
@@ -557,7 +558,7 @@ def reindex_all(self) -> None:
  self.results = tuple(
  self._reindex_one(obj, matching_indexes)
  for obj, matching_indexes in zip(
- self.objects, self.objects_matching_indexes
+ self.objects, self.objects_matching_indexes, strict=True
  )
  )
 
@@ -952,7 +953,7 @@ def is_alignable(obj):
  fill_value=fill_value,
  )
 
- for position, key, aligned_obj in zip(positions, keys, aligned):
+ for position, key, aligned_obj in zip(positions, keys, aligned, strict=True):
  if key is no_key:
  out[position] = aligned_obj
  else:

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -139,7 +139,8 @@ def _infer_concat_order_from_coords(datasets):
  # Append positions along extra dimension to structure which
  # encodes the multi-dimensional concatenation order
  tile_ids = [
- tile_id + (position,) for tile_id, position in zip(tile_ids, order)
+ tile_id + (position,)
+ for tile_id, position in zip(tile_ids, order, strict=True)
  ]
 
  if len(datasets) > 1 and not concat_dims:
@@ -148,7 +149,7 @@ def _infer_concat_order_from_coords(datasets):
  "order the datasets for concatenation"
  )
 
- combined_ids = dict(zip(tile_ids, datasets))
+ combined_ids = dict(zip(tile_ids, datasets, strict=True))
 
  return combined_ids, concat_dims
 
@@ -349,7 +350,7 @@ def _nested_combine(
  combined_ids = _infer_concat_order_from_positions(datasets)
  else:
  # Already sorted so just use the ids already passed
- combined_ids = dict(zip(ids, datasets))
+ combined_ids = dict(zip(ids, datasets, strict=True))
 
  # Check that the inferred shape is combinable
  _check_shape_tile_ids(combined_ids)

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -254,7 +254,7 @@ def sizes(self: Any) -> Mapping[Hashable, int]:
  --------
  Dataset.sizes
  """
- return Frozen(dict(zip(self.dims, self.shape)))
+ return Frozen(dict(zip(self.dims, self.shape, strict=True)))
 
 
 class AttrAccessMixin: