From 630ac54e724f243a9e322fc646f0fd9e7b950d33 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Apr 2021 22:30:30 -0600 Subject: [PATCH 01/16] backends code cleanup --- xarray/backends/api.py | 30 +++++++---------- xarray/backends/cfgrib_.py | 3 +- xarray/backends/h5netcdf_.py | 14 +++----- xarray/backends/netCDF4_.py | 62 +++++++++++++++--------------------- xarray/backends/netcdf3.py | 2 -- xarray/backends/plugins.py | 15 ++++----- xarray/backends/pydap_.py | 5 ++- xarray/backends/scipy_.py | 20 ++++++------ xarray/backends/zarr.py | 9 ++---- 9 files changed, 65 insertions(+), 95 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 01079025434..cab6f18b77d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -101,12 +101,11 @@ def _get_default_engine_netcdf(): def _get_default_engine(path: str, allow_remote: bool = False): if allow_remote and is_remote_uri(path): - engine = _get_default_engine_remote_uri() + return _get_default_engine_remote_uri() elif path.endswith(".gz"): - engine = _get_default_engine_gz() + return _get_default_engine_gz() else: - engine = _get_default_engine_netcdf() - return engine + return _get_default_engine_netcdf() def _validate_dataset_names(dataset): @@ -294,8 +293,7 @@ def _chunk_ds( name_prefix=name_prefix, token=token, ) - ds = backend_ds._replace(variables) - return ds + return backend_ds._replace(variables) def _dataset_from_backend_dataset( @@ -307,12 +305,11 @@ def _dataset_from_backend_dataset( overwrite_encoded_chunks, **extra_tokens, ): - if not (isinstance(chunks, (int, dict)) or chunks is None): - if chunks != "auto": - raise ValueError( - "chunks must be an int, dict, 'auto', or None. " - "Instead found %s. " % chunks - ) + if not isinstance(chunks, (int, dict)) and chunks is not None and chunks != "auto": + raise ValueError( + "chunks must be an int, dict, 'auto', or None. " + "Instead found %s. " % chunks + ) _protect_dataset_variables_inplace(backend_ds, cache) if chunks is None: @@ -330,9 +327,8 @@ def _dataset_from_backend_dataset( ds.set_close(backend_ds._close) # Ensure source filename always stored in dataset object (GH issue #2550) - if "source" not in ds.encoding: - if isinstance(filename_or_obj, str): - ds.encoding["source"] = filename_or_obj + if "source" not in ds.encoding and isinstance(filename_or_obj, str): + ds.encoding["source"] = filename_or_obj return ds @@ -503,7 +499,7 @@ def open_dataset( **decoders, **kwargs, ) - ds = _dataset_from_backend_dataset( + return _dataset_from_backend_dataset( backend_ds, filename_or_obj, engine, @@ -515,8 +511,6 @@ def open_dataset( **kwargs, ) - return ds - def open_dataarray( filename_or_obj, diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py index 24a075aa811..9e5546f052a 100644 --- a/xarray/backends/cfgrib_.py +++ b/xarray/backends/cfgrib_.py @@ -90,8 +90,7 @@ def get_dimensions(self): def get_encoding(self): dims = self.get_dimensions() - encoding = {"unlimited_dims": {k for k, v in dims.items() if v is None}} - return encoding + return {"unlimited_dims": {k for k, v in dims.items() if v is None}} class CfgribfBackendEntrypoint(BackendEntrypoint): diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 84e89f80dae..4e7e6dd8d0b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -37,8 +37,7 @@ class H5NetCDFArrayWrapper(BaseNetCDF4Array): def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) - variable = ds.variables[self.variable_name] - return variable + return ds.variables[self.variable_name] def __getitem__(self, key): return indexing.explicit_indexing_adapter( @@ -102,7 +101,7 @@ def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=Fal if group is None: root, group = find_root_and_group(manager) else: - if not type(manager) is h5netcdf.File: + if type(manager) is not h5netcdf.File: raise ValueError( "must supply a h5netcdf.File if the group " "argument is provided" @@ -233,11 +232,9 @@ def get_dimensions(self): return self.ds.dimensions def get_encoding(self): - encoding = {} - encoding["unlimited_dims"] = { - k for k, v in self.ds.dimensions.items() if v is None + return { + "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None} } - return encoding def set_dimension(self, name, length, is_unlimited=False): if is_unlimited: @@ -380,7 +377,7 @@ def open_dataset( store_entrypoint = StoreBackendEntrypoint() - ds = store_entrypoint.open_dataset( + return store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -390,7 +387,6 @@ def open_dataset( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - return ds if has_h5netcdf: diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index a60c940c3c4..e6b5ce3289b 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -129,18 +129,16 @@ def _check_encoding_dtype_is_vlen_string(dtype): def _get_datatype(var, nc_format="NETCDF4", raise_on_invalid_encoding=False): if nc_format == "NETCDF4": - datatype = _nc4_dtype(var) - else: - if "dtype" in var.encoding: - encoded_dtype = var.encoding["dtype"] - _check_encoding_dtype_is_vlen_string(encoded_dtype) - if raise_on_invalid_encoding: - raise ValueError( - "encoding dtype=str for vlen strings is only supported " - "with format='NETCDF4'." - ) - datatype = var.dtype - return datatype + return _nc4_dtype(var) + if "dtype" in var.encoding: + encoded_dtype = var.encoding["dtype"] + _check_encoding_dtype_is_vlen_string(encoded_dtype) + if raise_on_invalid_encoding: + raise ValueError( + "encoding dtype=str for vlen strings is only supported " + "with format='NETCDF4'." + ) + return var.dtype def _nc4_dtype(var): @@ -161,10 +159,7 @@ def _netcdf4_create_group(dataset, name): def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): - if group in {None, "", "/"}: - # use the root group - return ds - else: + if group not in {None, "", "/"}: # make sure it's a string if not isinstance(group, str): raise ValueError("group must be a string or None") @@ -179,7 +174,9 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): else: # wrap error to provide slightly more helpful message raise OSError("group not found: %s" % key, e) - return ds + + # use the root group + return ds def _ensure_fill_value_valid(data, attributes): @@ -203,7 +200,7 @@ def _force_native_endianness(var): # if endian exists, remove it from the encoding. var.encoding.pop("endian", None) # check to see if encoding has a value for endian its 'native' - if not var.encoding.get("endian", "native") == "native": + if var.encoding.get("endian", "native") != "native": raise NotImplementedError( "Attempt to write non-native endian type, " "this is not supported by the netCDF4 " @@ -282,10 +279,7 @@ def _extract_nc4_variable_encoding( def _is_list_of_strings(value): - if np.asarray(value).dtype.kind in ["U", "S"] and np.asarray(value).size > 1: - return True - else: - return False + return np.asarray(value).dtype.kind in ["U", "S"] and np.asarray(value).size > 1 class NetCDF4DataStore(WritableCFDataStore): @@ -313,7 +307,7 @@ def __init__( if group is None: root, group = find_root_and_group(manager) else: - if not type(manager) is netCDF4.Dataset: + if type(manager) is not netCDF4.Dataset: raise ValueError( "must supply a root netCDF4.Dataset if the group " "argument is provided" @@ -359,10 +353,7 @@ def open( if lock is None: if mode == "r": - if is_remote_uri(filename): - lock = NETCDFC_LOCK - else: - lock = NETCDF4_PYTHON_LOCK + lock = NETCDFC_LOCK if is_remote_uri(filename) else NETCDF4_PYTHON_LOCK else: if format is None or format.startswith("NETCDF4"): base_lock = NETCDF4_PYTHON_LOCK @@ -417,25 +408,22 @@ def open_store_variable(self, name, var): return Variable(dimensions, data, attributes, encoding) def get_variables(self): - dsvars = FrozenDict( + return FrozenDict( (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() ) - return dsvars def get_attrs(self): - attrs = FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs()) - return attrs + return FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs()) def get_dimensions(self): - dims = FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) - return dims + return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) def get_encoding(self): - encoding = {} - encoding["unlimited_dims"] = { - k for k, v in self.ds.dimensions.items() if v.isunlimited() + return { + "unlimited_dims": { + k for k, v in self.ds.dimensions.items() if v.isunlimited() + } } - return encoding def set_dimension(self, name, length, is_unlimited=False): dim_length = length if not is_unlimited else None diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 001af0bf8e1..5fdd0534d57 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -125,8 +125,6 @@ def is_valid_nc3_name(s): """ if not isinstance(s, str): return False - if not isinstance(s, str): - s = s.decode("utf-8") num_bytes = len(s.encode("utf-8")) return ( (unicodedata.normalize("NFC", s) == s) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 23e83b0021e..dddb8ff3d23 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -70,10 +70,12 @@ def set_missing_parameters(backend_entrypoints): def sort_backends(backend_entrypoints): - ordered_backends_entrypoints = {} - for be_name in STANDARD_BACKENDS_ORDER: - if backend_entrypoints.get(be_name, None) is not None: - ordered_backends_entrypoints[be_name] = backend_entrypoints.pop(be_name) + ordered_backends_entrypoints = { + be_name: backend_entrypoints.pop(be_name) + for be_name in STANDARD_BACKENDS_ORDER + if backend_entrypoints.get(be_name, None) is not None + } + ordered_backends_entrypoints.update( {name: backend_entrypoints[name] for name in sorted(backend_entrypoints)} ) @@ -87,10 +89,7 @@ def build_engines(pkg_entrypoints): backend_entrypoints.update(external_backend_entrypoints) backend_entrypoints = sort_backends(backend_entrypoints) set_missing_parameters(backend_entrypoints) - engines = {} - for name, backend in backend_entrypoints.items(): - engines[name] = backend() - return engines + return {name: backend() for name, backend in backend_entrypoints.items()} @functools.lru_cache(maxsize=1) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 148f32cf982..273bb372340 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -45,7 +45,7 @@ def _getitem(self, key): result = robust_getitem(array, key, catch=ValueError) # in some cases, pydap doesn't squeeze axes automatically like numpy axis = tuple(n for n, k in enumerate(key) if isinstance(k, integer_types)) - if result.ndim + len(axis) != array.ndim and len(axis) > 0: + if result.ndim + len(axis) != array.ndim and axis: result = np.squeeze(result, axis) return result @@ -130,7 +130,7 @@ def open_dataset( store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): - ds = store_entrypoint.open_dataset( + return store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -140,7 +140,6 @@ def open_dataset( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - return ds if has_pydap: diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index c27716ea44d..5899ec53241 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -174,11 +174,9 @@ def get_dimensions(self): return Frozen(self.ds.dimensions) def get_encoding(self): - encoding = {} - encoding["unlimited_dims"] = { - k for k, v in self.ds.dimensions.items() if v is None + return { + "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None} } - return encoding def set_dimension(self, name, length, is_unlimited=False): if name in self.ds.dimensions: @@ -204,12 +202,14 @@ def encode_variable(self, variable): def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None ): - if check_encoding and variable.encoding: - if variable.encoding != {"_FillValue": None}: - raise ValueError( - "unexpected encoding for scipy backend: %r" - % list(variable.encoding) - ) + if ( + check_encoding + and variable.encoding + and variable.encoding != {"_FillValue": None} + ): + raise ValueError( + "unexpected encoding for scipy backend: %r" % list(variable.encoding) + ) data = variable.data # nb. this still creates a numpy array in all memory, even though we diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index a4086eacece..f04e143d6a4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -379,8 +379,7 @@ def get_variables(self): ) def get_attrs(self): - attributes = dict(self.ds.attrs.asdict()) - return attributes + return dict(self.ds.attrs.asdict()) def get_dimensions(self): dimensions = {} @@ -458,7 +457,7 @@ def store( variables_without_encoding, attributes ) - if len(existing_variables) > 0: + if existing_variables: # there are variables to append # their encoding must be the same as in the store ds = open_zarr(self.ds.store, group=self.ds.path, chunks=None) @@ -684,7 +683,7 @@ def open_zarr( "storage_options": storage_options, } - ds = open_dataset( + return open_dataset( filename_or_obj=store, group=group, decode_cf=decode_cf, @@ -700,8 +699,6 @@ def open_zarr( use_cftime=use_cftime, ) - return ds - class ZarrBackendEntrypoint(BackendEntrypoint): def open_dataset( From d68ede475774b8ab54853da5ddcbae93dd0be50e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Apr 2021 22:35:25 -0600 Subject: [PATCH 02/16] Coding module cleanup --- xarray/coding/cftime_offsets.py | 14 ++------- xarray/coding/cftimeindex.py | 26 ++++++++-------- xarray/coding/frequencies.py | 15 +++++---- xarray/coding/strings.py | 54 ++++++++++++++++----------------- xarray/coding/times.py | 3 +- xarray/coding/variables.py | 18 +++++------ 6 files changed, 60 insertions(+), 70 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index c25d5296c41..c031bffb2cd 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -178,8 +178,7 @@ def _get_day_of_month(other, day_option): if day_option == "start": return 1 elif day_option == "end": - days_in_month = _days_in_month(other) - return days_in_month + return _days_in_month(other) elif day_option is None: # Note: unlike `_shift_month`, _get_day_of_month does not # allow day_option = None @@ -291,10 +290,7 @@ def roll_qtrday(other, n, month, day_option, modby=3): def _validate_month(month, default_month): - if month is None: - result_month = default_month - else: - result_month = month + result_month = default_month if month is None else month if not isinstance(result_month, int): raise TypeError( "'self.month' must be an integer value between 1 " @@ -687,11 +683,7 @@ def to_offset(freq): freq = freq_data["freq"] multiples = freq_data["multiple"] - if multiples is None: - multiples = 1 - else: - multiples = int(multiples) - + multiples = 1 if multiples is None else int(multiples) return _FREQUENCIES[freq](n=multiples) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 15f75955e00..a43724a6f31 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -255,7 +255,7 @@ def format_times( indent = first_row_offset if row == 0 else offset row_end = last_row_end if row == n_rows - 1 else intermediate_row_end times_for_row = index[row * n_per_row : (row + 1) * n_per_row] - representation = representation + format_row( + representation += format_row( times_for_row, indent=indent, separator=separator, row_end=row_end ) @@ -268,8 +268,9 @@ def format_attrs(index, separator=", "): "dtype": f"'{index.dtype}'", "length": f"{len(index)}", "calendar": f"'{index.calendar}'", + "freq": f"'{index.freq}'" if len(index) >= 3 else None, } - attrs["freq"] = f"'{index.freq}'" if len(index) >= 3 else None + attrs_str = [f"{k}={v}" for k, v in attrs.items()] attrs_str = f"{separator}".join(attrs_str) return attrs_str @@ -350,14 +351,13 @@ def __repr__(self): attrs_str = format_attrs(self) # oneliner only if smaller than display_width full_repr_str = f"{klass_name}([{datastr}], {attrs_str})" - if len(full_repr_str) <= display_width: - return full_repr_str - else: + if len(full_repr_str) > display_width: # if attrs_str too long, one per line if len(attrs_str) >= display_width - offset: attrs_str = attrs_str.replace(",", f",\n{' '*(offset-2)}") full_repr_str = f"{klass_name}([{datastr}],\n{' '*(offset-1)}{attrs_str})" - return full_repr_str + + return full_repr_str def _partial_date_slice(self, resolution, parsed): """Adapted from @@ -470,15 +470,15 @@ def get_loc(self, key, method=None, tolerance=None): def _maybe_cast_slice_bound(self, label, side, kind): """Adapted from pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound""" - if isinstance(label, str): - parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) - start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) - if self.is_monotonic_decreasing and len(self) > 1: - return end if side == "left" else start - return start if side == "left" else end - else: + if not isinstance(label, str): return label + parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) + start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) + if self.is_monotonic_decreasing and len(self) > 1: + return end if side == "left" else start + return start if side == "left" else end + # TODO: Add ability to use integer range outside of iloc? # e.g. series[1:5]. def get_value(self, series, key): diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py index c83c766f071..f1695051f95 100644 --- a/xarray/coding/frequencies.py +++ b/xarray/coding/frequencies.py @@ -187,7 +187,7 @@ def _get_quartely_rule(self): if len(self.month_deltas) > 1: return None - if not self.month_deltas[0] % 3 == 0: + if self.month_deltas[0] % 3 != 0: return None return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index)) @@ -232,13 +232,13 @@ def _is_multiple(us, mult: int): def _maybe_add_count(base: str, count: float): """If count is greater than 1, add it to the base offset string""" - if count != 1: - assert count == int(count) - count = int(count) - return f"{count}{base}" - else: + if count == 1: return base + assert count == int(count) + count = int(count) + return f"{count}{base}" + def month_anchor_check(dates): """Return the monthly offset string. @@ -259,8 +259,7 @@ def month_anchor_check(dates): if calendar_end: cal = date.day == date.daysinmonth - if calendar_end: - calendar_end &= cal + calendar_end &= cal elif not calendar_start: break diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index e16e983fd8a..9395e42f8df 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -130,19 +130,19 @@ def bytes_to_char(arr): if arr.dtype.kind != "S": raise ValueError("argument must have a fixed-width bytes dtype") - if is_duck_dask_array(arr): - import dask.array as da - - return da.map_blocks( - _numpy_bytes_to_char, - arr, - dtype="S1", - chunks=arr.chunks + ((arr.dtype.itemsize,)), - new_axis=[arr.ndim], - ) - else: + if not is_duck_dask_array(arr): return _numpy_bytes_to_char(arr) + import dask.array as da + + return da.map_blocks( + _numpy_bytes_to_char, + arr, + dtype="S1", + chunks=arr.chunks + ((arr.dtype.itemsize,)), + new_axis=[arr.ndim], + ) + def _numpy_bytes_to_char(arr): """Like netCDF4.stringtochar, but faster and more flexible.""" @@ -166,25 +166,25 @@ def char_to_bytes(arr): # can't make an S0 dtype return np.zeros(arr.shape[:-1], dtype=np.string_) - if is_duck_dask_array(arr): - import dask.array as da + if not is_duck_dask_array(arr): + return StackedBytesArray(arr) - if len(arr.chunks[-1]) > 1: - raise ValueError( - "cannot stacked dask character array with " - "multiple chunks in the last dimension: {}".format(arr) - ) + import dask.array as da - dtype = np.dtype("S" + str(arr.shape[-1])) - return da.map_blocks( - _numpy_char_to_bytes, - arr, - dtype=dtype, - chunks=arr.chunks[:-1], - drop_axis=[arr.ndim - 1], + if len(arr.chunks[-1]) > 1: + raise ValueError( + "cannot stacked dask character array with " + "multiple chunks in the last dimension: {}".format(arr) ) - else: - return StackedBytesArray(arr) + + dtype = np.dtype("S" + str(arr.shape[-1])) + return da.map_blocks( + _numpy_char_to_bytes, + arr, + dtype=dtype, + chunks=arr.chunks[:-1], + drop_axis=[arr.ndim - 1], + ) def _numpy_char_to_bytes(arr): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 54400414ebc..fde38de1a31 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -370,8 +370,7 @@ def infer_timedelta_units(deltas): """ deltas = to_timedelta_unboxed(np.asarray(deltas).ravel()) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) - units = _infer_time_units_from_diff(unique_timedeltas) - return units + return _infer_time_units_from_diff(unique_timedeltas) def cftime_to_nptime(times): diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 938752c4efc..78b227bf396 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -91,13 +91,13 @@ def lazy_elemwise_func(array, func, dtype): ------- Either a dask.array.Array or _ElementwiseFunctionArray. """ - if is_duck_dask_array(array): - import dask.array as da - - return da.map_blocks(func, array, dtype=dtype) - else: + if not is_duck_dask_array(array): return _ElementwiseFunctionArray(array, func, dtype) + import dask.array as da + + return da.map_blocks(func, array, dtype=dtype) + def unpack_for_encoding(var): return var.dims, var.data, var.attrs.copy(), var.encoding.copy() @@ -255,10 +255,10 @@ def encode(self, variable, name=None): if "scale_factor" in encoding or "add_offset" in encoding: dtype = _choose_float_dtype(data.dtype, "add_offset" in encoding) data = data.astype(dtype=dtype, copy=True) - if "add_offset" in encoding: - data -= pop_to(encoding, attrs, "add_offset", name=name) - if "scale_factor" in encoding: - data /= pop_to(encoding, attrs, "scale_factor", name=name) + if "add_offset" in encoding: + data -= pop_to(encoding, attrs, "add_offset", name=name) + if "scale_factor" in encoding: + data /= pop_to(encoding, attrs, "scale_factor", name=name) return Variable(dims, data, attrs, encoding) From a4f0346ea28b8519d1953c20ff466933317825b3 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Apr 2021 23:00:32 -0600 Subject: [PATCH 03/16] Core module cleanup --- xarray/core/accessor_dt.py | 64 +++++++++-------- xarray/core/accessor_str.py | 14 ++-- xarray/core/alignment.py | 27 +++----- xarray/core/combine.py | 5 +- xarray/core/coordinates.py | 113 +++++++++++++++---------------- xarray/core/dask_array_compat.py | 4 +- xarray/core/dataarray.py | 21 +++--- xarray/core/dtypes.py | 5 +- xarray/core/duck_array_ops.py | 8 +-- xarray/core/formatting_html.py | 8 +-- xarray/core/groupby.py | 26 +++---- xarray/core/merge.py | 23 +++---- xarray/core/npcompat.py | 3 +- xarray/core/resample_cftime.py | 18 +---- xarray/core/rolling.py | 46 +++++-------- xarray/core/utils.py | 30 ++++---- xarray/core/variable.py | 24 ++----- 17 files changed, 179 insertions(+), 260 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 1d4ef755fa0..da3099aa72b 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -81,20 +81,20 @@ def _get_date_field(values, name, dtype): else: access_method = _access_through_cftimeindex - if is_duck_dask_array(values): - from dask.array import map_blocks + if not is_duck_dask_array(values): + return access_method(values, name) - new_axis = chunks = None - # isocalendar adds adds an axis - if name == "isocalendar": - chunks = (3,) + values.chunksize - new_axis = 0 + from dask.array import map_blocks - return map_blocks( - access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks - ) - else: - return access_method(values, name) + new_axis = chunks = None + # isocalendar adds adds an axis + if name == "isocalendar": + chunks = (3,) + values.chunksize + new_axis = 0 + + return map_blocks( + access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks + ) def _round_through_series_or_index(values, name, freq): @@ -134,16 +134,16 @@ def _round_field(values, name, freq): Array-like of datetime fields accessed for each element in values """ - if is_duck_dask_array(values): - from dask.array import map_blocks - - dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") - return map_blocks( - _round_through_series_or_index, values, name, freq=freq, dtype=dtype - ) - else: + if not is_duck_dask_array(values): return _round_through_series_or_index(values, name, freq) + from dask.array import map_blocks + + dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") + return map_blocks( + _round_through_series_or_index, values, name, freq=freq, dtype=dtype + ) + def _strftime_through_cftimeindex(values, date_format): """Coerce an array of cftime-like values to a CFTimeIndex @@ -171,13 +171,13 @@ def _strftime(values, date_format): access_method = _strftime_through_series else: access_method = _strftime_through_cftimeindex - if is_duck_dask_array(values): - from dask.array import map_blocks - - return map_blocks(access_method, values, date_format) - else: + if not is_duck_dask_array(values): return access_method(values, date_format) + from dask.array import map_blocks + + return map_blocks(access_method, values, date_format) + class Properties: def __init__(self, obj): @@ -344,12 +344,12 @@ def isocalendar(self): values = _get_date_field(self._obj.data, "isocalendar", np.int64) obj_type = type(self._obj) - data_vars = {} - for i, name in enumerate(["year", "week", "weekday"]): - data_vars[name] = obj_type( + data_vars = { + name: obj_type( values[i], name=name, coords=self._obj.coords, dims=self._obj.dims ) - + for i, name in enumerate(["year", "week", "weekday"]) + } return Dataset(data_vars) year = Properties._tslib_field_accessor( @@ -386,13 +386,11 @@ def weekofyear(self): ) if LooseVersion(pd.__version__) < "1.1.0": - weekofyear = Properties._tslib_field_accessor( + return Properties._tslib_field_accessor( "weekofyear", "The week ordinal of the year", np.int64 ).fget(self) else: - weekofyear = self.isocalendar().week - - return weekofyear + return self.isocalendar().week week = weekofyear dayofweek = Properties._tslib_field_accessor( diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index f0e416b52e6..8e587fffe7e 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -81,7 +81,7 @@ def _contains_obj_type(*, pat: Any, checker: Any) -> bool: return True # If it is not an object array it can't contain compiled re - if not getattr(pat, "dtype", "no") == np.object_: + if getattr(pat, "dtype", "no") != np.object_: return False return _apply_str_ufunc(func=checker, obj=pat).all() @@ -95,7 +95,7 @@ def _contains_str_like(pat: Any) -> bool: if not hasattr(pat, "dtype"): return False - return pat.dtype.kind == "U" or pat.dtype.kind == "S" + return pat.dtype.kind in ["U", "S"] def _contains_compiled_re(pat: Any) -> bool: @@ -271,13 +271,13 @@ def _re_compile( if getattr(pat, "dtype", None) != np.object_: pat = self._stringify(pat) func = lambda x: re.compile(x, flags=flags) - if isinstance(pat, np.ndarray): - # apply_ufunc doesn't work for numpy arrays with output object dtypes - func = np.vectorize(func) - return func(pat) - else: + if not isinstance(pat, np.ndarray): return _apply_str_ufunc(func=func, obj=pat, dtype=np.object_) + # apply_ufunc doesn't work for numpy arrays with output object dtypes + func = np.vectorize(func) + return func(pat) + def len(self) -> Any: """ Compute the length of each string in the array. diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 98cbadcb25c..abcd4248a86 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -62,10 +62,10 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): - new_indexes = {} - for dim in obj.indexes: - if dim not in exclude: - new_indexes[dim] = all_indexes[dim][0] + new_indexes = { + dim: all_indexes[dim][0] for dim in obj.indexes if dim not in exclude + } + objects[idx + 1] = obj._overwrite_indexes(new_indexes) return objects @@ -336,15 +336,12 @@ def align( % (dim, unlabeled_sizes, labeled_size) ) - for dim in unlabeled_dim_sizes: - if dim not in all_indexes: - sizes = unlabeled_dim_sizes[dim] - if len(sizes) > 1: - raise ValueError( - "arguments without labels along dimension %r cannot be " - "aligned because they have different dimension sizes: %r" - % (dim, sizes) - ) + for dim, sizes in unlabeled_dim_sizes.items(): + if dim not in all_indexes and len(sizes) > 1: + raise ValueError( + "arguments without labels along dimension %r cannot be " + "aligned because they have different dimension sizes: %r" % (dim, sizes) + ) result = [] for obj in objects: @@ -747,8 +744,6 @@ def broadcast(*args, exclude=None): args = align(*args, join="outer", copy=False, exclude=exclude) dims_map, common_coords = _get_broadcast_dims_map_common_coords(args, exclude) - result = [] - for arg in args: - result.append(_broadcast_helper(arg, exclude, dims_map, common_coords)) + result = [_broadcast_helper(arg, exclude, dims_map, common_coords) for arg in args] return tuple(result) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 375931e1f9c..bbd34a1d67f 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -11,8 +11,7 @@ def _infer_concat_order_from_positions(datasets): - combined_ids = dict(_infer_tile_ids_from_nested_list(datasets, ())) - return combined_ids + return dict(_infer_tile_ids_from_nested_list(datasets, ())) def _infer_tile_ids_from_nested_list(entry, current_pos): @@ -140,7 +139,7 @@ def _check_dimension_depth_tile_ids(combined_tile_ids): nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: nesting_depths = [0] - if not set(nesting_depths) == {nesting_depths[0]}: + if set(nesting_depths) != {nesting_depths[0]}: raise ValueError( "The supplied objects do not form a hypercube because" " sub-lists do not have consistent depths" diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 16eecef6efe..4b92cd7e79b 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -107,52 +107,53 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: (dim,) = ordered_dims return self._data.get_index(dim) # type: ignore[attr-defined] else: - indexes = [ - self._data.get_index(k) for k in ordered_dims # type: ignore[attr-defined] - ] + return self._extracted_from_to_index_17(ordered_dims) - # compute the sizes of the repeat and tile for the cartesian product - # (taken from pandas.core.reshape.util) - index_lengths = np.fromiter( - (len(index) for index in indexes), dtype=np.intp - ) - cumprod_lengths = np.cumproduct(index_lengths) + def _extracted_from_to_index_17(self, ordered_dims): + indexes = [ + self._data.get_index(k) for k in ordered_dims # type: ignore[attr-defined] + ] - if cumprod_lengths[-1] != 0: - # sizes of the repeats - repeat_counts = cumprod_lengths[-1] / cumprod_lengths + # compute the sizes of the repeat and tile for the cartesian product + # (taken from pandas.core.reshape.util) + index_lengths = np.fromiter((len(index) for index in indexes), dtype=np.intp) + cumprod_lengths = np.cumproduct(index_lengths) + + if cumprod_lengths[-1] == 0: + # if any factor is empty, the cartesian product is empty + repeat_counts = np.zeros_like(cumprod_lengths) + + else: + # sizes of the repeats + repeat_counts = cumprod_lengths[-1] / cumprod_lengths + # sizes of the tiles + tile_counts = np.roll(cumprod_lengths, 1) + tile_counts[0] = 1 + + # loop over the indexes + # for each MultiIndex or Index compute the cartesian product of the codes + + code_list = [] + level_list = [] + names = [] + + for i, index in enumerate(indexes): + if isinstance(index, pd.MultiIndex): + codes, levels = index.codes, index.levels else: - # if any factor is empty, the cartesian product is empty - repeat_counts = np.zeros_like(cumprod_lengths) - - # sizes of the tiles - tile_counts = np.roll(cumprod_lengths, 1) - tile_counts[0] = 1 - - # loop over the indexes - # for each MultiIndex or Index compute the cartesian product of the codes - - code_list = [] - level_list = [] - names = [] - - for i, index in enumerate(indexes): - if isinstance(index, pd.MultiIndex): - codes, levels = index.codes, index.levels - else: - code, level = pd.factorize(index) - codes = [code] - levels = [level] - - # compute the cartesian product - code_list += [ - np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) - for code in codes - ] - level_list += levels - names += index.names - - return pd.MultiIndex(level_list, code_list, names=names) + code, level = pd.factorize(index) + codes = [code] + levels = [level] + + # compute the cartesian product + code_list += [ + np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) + for code in codes + ] + level_list += levels + names += index.names + + return pd.MultiIndex(level_list, code_list, names=names) def update(self, other: Mapping[Hashable, Any]) -> None: other_vars = getattr(other, "variables", other) @@ -220,10 +221,9 @@ def merge(self, other: "Coordinates") -> "Dataset": coords, indexes = merge_coordinates_without_align([self, other]) coord_names = set(coords) - merged = Dataset._construct_direct( + return Dataset._construct_direct( variables=coords, coord_names=coord_names, indexes=indexes ) - return merged class DatasetCoordinates(Coordinates): @@ -358,13 +358,13 @@ def to_dataset(self) -> "Dataset": return Dataset._construct_direct(coords, set(coords)) def __delitem__(self, key: Hashable) -> None: - if key in self: - del self._data._coords[key] - if self._data._indexes is not None and key in self._data._indexes: - del self._data._indexes[key] - else: + if key not in self: raise KeyError(f"{key!r} is not a coordinate variable.") + del self._data._coords[key] + if self._data._indexes is not None and key in self._data._indexes: + del self._data._indexes[key] + def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() @@ -380,14 +380,13 @@ def assert_coordinate_consistent( """ for k in obj.dims: # make sure there are no conflict in dimension coordinates - if k in coords and k in obj.coords: - if not coords[k].equals(obj[k].variable): - raise IndexError( - "dimension coordinate {!r} conflicts between " - "indexed and indexing objects:\n{}\nvs.\n{}".format( - k, obj[k], coords[k] - ) + if k in coords and k in obj.coords and not coords[k].equals(obj[k].variable): + raise IndexError( + "dimension coordinate {!r} conflicts between " + "indexed and indexing objects:\n{}\nvs.\n{}".format( + k, obj[k], coords[k] ) + ) def remap_label_indexers( diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index db70d16fecc..73169004f1a 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -81,7 +81,7 @@ def nanmedian(a, axis=None, keepdims=False): a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)}) - result = da.map_blocks( + return da.map_blocks( np.nanmedian, a, axis=axis, @@ -92,8 +92,6 @@ def nanmedian(a, axis=None, keepdims=False): else None, ) - return result - if LooseVersion(dask_version) > LooseVersion("2.30.0"): ensure_minimum_chunksize = da.overlap.ensure_minimum_chunksize diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c622f50335d..90d9ff750f3 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -537,8 +537,7 @@ def _to_dataset_whole( indexes = self._indexes coord_names = set(self._coords) - dataset = Dataset._construct_direct(variables, coord_names, indexes=indexes) - return dataset + return Dataset._construct_direct(variables, coord_names, indexes=indexes) def to_dataset( self, @@ -667,9 +666,8 @@ def dims(self, value): def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: if utils.is_dict_like(key): return key - else: - key = indexing.expanded_indexer(key, self.ndim) - return dict(zip(self.dims, key)) + key = indexing.expanded_indexer(key, self.ndim) + return dict(zip(self.dims, key)) @property def _level_coords(self) -> Dict[Hashable, Hashable]: @@ -807,13 +805,12 @@ def reset_coords( dataset = self.coords.to_dataset().reset_coords(names, drop) if drop: return self._replace(coords=dataset._variables) - else: - if self.name is None: - raise ValueError( - "cannot reset_coords with drop=False on an unnamed DataArrray" - ) - dataset[self.name] = self.variable - return dataset + if self.name is None: + raise ValueError( + "cannot reset_coords with drop=False on an unnamed DataArrray" + ) + dataset[self.name] = self.variable + return dataset def __dask_tokenize__(self): from dask.base import normalize_token diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 51499c3a687..5f9349051b7 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -63,10 +63,7 @@ def maybe_promote(dtype): # Check np.timedelta64 before np.integer fill_value = np.timedelta64("NaT") elif np.issubdtype(dtype, np.integer): - if dtype.itemsize <= 2: - dtype = np.float32 - else: - dtype = np.float64 + dtype = np.float32 if dtype.itemsize <= 2 else np.float64 fill_value = np.nan elif np.issubdtype(dtype, np.complexfloating): fill_value = np.nan + np.nan * 1j diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7c4923db296..a3e1786d43f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -187,7 +187,7 @@ def asarray(data, xp=np): def as_shared_dtype(scalars_or_arrays): """Cast a arrays to a shared dtype using xarray's type promotion rules.""" - if any([isinstance(x, cupy_array_type) for x in scalars_or_arrays]): + if any(isinstance(x, cupy_array_type) for x in scalars_or_arrays): import cupy as cp arrays = [asarray(x, xp=cp) for x in scalars_or_arrays] @@ -440,11 +440,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # TODO: make this function dask-compatible? # Set offset to minimum if not given if offset is None: - if array.dtype.kind in "Mm": - offset = _datetime_nanmin(array) - else: - offset = min(array) - + offset = _datetime_nanmin(array) if array.dtype.kind in "Mm" else min(array) # Compute timedelta object. # For np.datetime64, this can silently yield garbage due to overflow. # One option is to enforce 1970-01-01 as the universal offset. diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 9730a0a1745..4bdc8e71b88 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -25,9 +25,8 @@ def short_data_repr_html(array): internal_data = getattr(array, "variable", array)._data if hasattr(internal_data, "_repr_html_"): return internal_data._repr_html_() - else: - text = escape(short_data_repr(array)) - return f"
{text}
" + text = escape(short_data_repr(array)) + return f"
{text}
" def format_dims(dims, coord_names): @@ -77,8 +76,7 @@ def summarize_coord(name, var): if is_index: coord = var.variable.to_index_variable() if coord.level_names is not None: - coords = {} - coords[name] = _summarize_coord_multiindex(name, coord) + coords = {name: _summarize_coord_multiindex(name, coord)} for lname in coord.level_names: var = coord.get_level_variable(lname) coords[lname] = summarize_variable(lname, var) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e2678896c0e..3149606caab 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -141,8 +141,7 @@ def _inverse_permutation_indices(positions): return None positions = [np.arange(sl.start, sl.stop, sl.step) for sl in positions] - indices = nputils.inverse_permutation(np.concatenate(positions)) - return indices + return nputils.inverse_permutation(np.concatenate(positions)) class _DummyGroup: @@ -200,9 +199,8 @@ def _ensure_1d(group, obj): def _unique_and_monotonic(group): if isinstance(group, _DummyGroup): return True - else: - index = safe_cast_to_index(group) - return index.is_unique and index.is_monotonic + index = safe_cast_to_index(group) + return index.is_unique and index.is_monotonic def _apply_loffset(grouper, result): @@ -475,8 +473,7 @@ def _infer_concat_args(self, applied_example): def _binary_op(self, other, f, reflexive=False): g = f if not reflexive else lambda x, y: f(y, x) applied = self._yield_binary_applied(g, other) - combined = self._combine(applied) - return combined + return self._combine(applied) def _yield_binary_applied(self, func, other): dummy = None @@ -548,8 +545,7 @@ def fillna(self, value): Dataset.fillna DataArray.fillna """ - out = ops.fillna(self, value) - return out + return ops.fillna(self, value) def quantile( self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True @@ -646,7 +642,7 @@ def quantile( if dim is None: dim = self._group_dim - out = self.map( + return self.map( self._obj.__class__.quantile, shortcut=False, q=q, @@ -656,8 +652,6 @@ def quantile( skipna=skipna, ) - return out - def where(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. @@ -737,8 +731,7 @@ def _concat_shortcut(self, applied, dim, positions=None): # compiled language) stacked = Variable.concat(applied, dim, shortcut=True) reordered = _maybe_reorder(stacked, dim, positions) - result = self._obj._replace_maybe_drop_dims(reordered) - return result + return self._obj._replace_maybe_drop_dims(reordered) def _restore_dim_order(self, stacked): def lookup_order(dimension): @@ -795,10 +788,7 @@ def map(self, func, shortcut=False, args=(), **kwargs): applied : DataArray or DataArray The result of splitting, applying and combining this array. """ - if shortcut: - grouped = self._iter_grouped_shortcut() - else: - grouped = self._iter_grouped() + grouped = self._iter_grouped_shortcut() if shortcut else self._iter_grouped() applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped) return self._combine(applied, shortcut=shortcut) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index ec95563bda9..c250c80c3ea 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -129,13 +129,13 @@ def unique_variable( if equals is not True: break - if equals is None: - # now compare values with minimum number of computes - out = out.compute() - for var in variables[1:]: - equals = getattr(out, compat)(var) - if not equals: - break + if equals is None: + # now compare values with minimum number of computes + out = out.compute() + for var in variables[1:]: + equals = getattr(out, compat)(var) + if not equals: + break if not equals: raise MergeError( @@ -633,11 +633,7 @@ def merge_core( ) attrs = merge_attrs( - [ - var.attrs - for var in coerced - if isinstance(var, Dataset) or isinstance(var, DataArray) - ], + [var.attrs for var in coerced if isinstance(var, (Dataset, DataArray))], combine_attrs, ) @@ -882,8 +878,7 @@ def merge( combine_attrs=combine_attrs, fill_value=fill_value, ) - merged = Dataset._construct_direct(**merge_result._asdict()) - return merged + return Dataset._construct_direct(**merge_result._asdict()) def dataset_merge_method( diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 803c7c3ccfe..7dab09bf847 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -73,8 +73,7 @@ def moveaxis(a, source, destination): for dest, src in sorted(zip(destination, source)): order.insert(dest, src) - result = transpose(order) - return result + return transpose(order) # Type annotations stubs diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 882664cbb60..1870cb4f594 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -163,11 +163,7 @@ def _get_time_bins(index, freq, closed, label, base): datetime_bins, freq, closed, index, labels ) - if label == "right": - labels = labels[1:] - else: - labels = labels[:-1] - + labels = labels[1:] if label == "right" else labels[:-1] # TODO: when CFTimeIndex supports missing values, if the reference index # contains missing values, insert the appropriate NaN value at the # beginning of the datetime_bins and labels indexes. @@ -262,11 +258,7 @@ def _get_range_edges(first, last, offset, closed="left", base=0): first = normalize_date(first) last = normalize_date(last) - if closed == "left": - first = offset.rollback(first) - else: - first = first - offset - + first = offset.rollback(first) if closed == "left" else first - offset last = last + offset return first, last @@ -321,11 +313,7 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): else: lresult = last else: - if foffset.total_seconds() > 0: - fresult = first - foffset - else: - fresult = first - + fresult = first - foffset if foffset.total_seconds() > 0 else first if loffset.total_seconds() > 0: lresult = last + (offset.as_timedelta() - loffset) else: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 6b65c2cd536..ff73578c42f 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -6,7 +6,6 @@ from . import dtypes, duck_array_ops, utils from .arithmetic import CoarsenArithmetic -from .dask_array_ops import dask_rolling_wrapper from .options import _get_keep_attrs from .pycompat import is_duck_dask_array @@ -173,11 +172,10 @@ def _mapping_to_list( if utils.is_dict_like(arg): if allow_default: return [arg.get(d, default) for d in self.dim] - else: - for d in self.dim: - if d not in arg: - raise KeyError(f"argument has no key {d}.") - return [arg[d] for d in self.dim] + for d in self.dim: + if d not in arg: + raise KeyError(f"argument has no key {d}.") + return [arg[d] for d in self.dim] elif allow_allsame: # for single argument return [arg] * len(self.dim) elif len(self.dim) == 1: @@ -435,11 +433,7 @@ def reduce(self, func, keep_attrs=None, **kwargs): # save memory with reductions GH4325 fillna = kwargs.pop("fillna", dtypes.NA) - if fillna is not dtypes.NA: - obj = self.obj.fillna(fillna) - else: - obj = self.obj - + obj = self.obj.fillna(fillna) if fillna is not dtypes.NA else self.obj windows = self._construct( obj, rolling_dim, keep_attrs=keep_attrs, fill_value=fillna ) @@ -504,9 +498,6 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") - values = dask_rolling_wrapper( - func, padded.data, window=self.window[0], min_count=min_count, axis=axis - ) else: values = func( padded.data, window=self.window[0], min_count=min_count, axis=axis @@ -549,20 +540,17 @@ def _numpy_or_bottleneck_reduce( return self._bottleneck_reduce( bottleneck_move_func, keep_attrs=keep_attrs, **kwargs ) - else: - if rolling_agg_func: - return rolling_agg_func( - self, keep_attrs=self._get_keep_attrs(keep_attrs) - ) - if fillna is not None: - if fillna is dtypes.INF: - fillna = dtypes.get_pos_infinity(self.obj.dtype, max_for_int=True) - elif fillna is dtypes.NINF: - fillna = dtypes.get_neg_infinity(self.obj.dtype, min_for_int=True) - kwargs.setdefault("skipna", False) - kwargs.setdefault("fillna", fillna) + if rolling_agg_func: + return rolling_agg_func(self, keep_attrs=self._get_keep_attrs(keep_attrs)) + if fillna is not None: + if fillna is dtypes.INF: + fillna = dtypes.get_pos_infinity(self.obj.dtype, max_for_int=True) + elif fillna is dtypes.NINF: + fillna = dtypes.get_neg_infinity(self.obj.dtype, min_for_int=True) + kwargs.setdefault("skipna", False) + kwargs.setdefault("fillna", fillna) - return self.reduce(array_agg_func, keep_attrs=keep_attrs, **kwargs) + return self.reduce(array_agg_func, keep_attrs=keep_attrs, **kwargs) class DatasetRolling(Rolling): @@ -612,7 +600,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None dims.append(d) center[d] = self.center[i] - if len(dims) > 0: + if dims: w = {d: windows[d] for d in dims} self.rollings[key] = DataArrayRolling(da, w, min_periods, center) @@ -735,7 +723,7 @@ def construct( for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on self.dim dims = [d for d in self.dim if d in da.dims] - if len(dims) > 0: + if dims: wi = {d: window_dim[i] for i, d in enumerate(self.dim) if d in da.dims} st = {d: stride[i] for i, d in enumerate(self.dim) if d in da.dims} diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d3b4cd39c53..d92ce8a9584 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -254,7 +254,7 @@ def is_full_slice(value: Any) -> bool: def is_list_like(value: Any) -> bool: - return isinstance(value, list) or isinstance(value, tuple) + return isinstance(value, (list, tuple)) def is_duck_array(value: Any) -> bool: @@ -274,22 +274,19 @@ def either_dict_or_kwargs( kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: - if pos_kwargs is not None: - if not is_dict_like(pos_kwargs): - raise ValueError( - "the first argument to .%s must be a dictionary" % func_name - ) - if kw_kwargs: - raise ValueError( - "cannot specify both keyword and positional " - "arguments to .%s" % func_name - ) - return pos_kwargs - else: + if pos_kwargs is None: # Need an explicit cast to appease mypy due to invariance; see # https://github.com/python/mypy/issues/6228 return cast(Mapping[Hashable, T], kw_kwargs) + if not is_dict_like(pos_kwargs): + raise ValueError("the first argument to .%s must be a dictionary" % func_name) + if kw_kwargs: + raise ValueError( + "cannot specify both keyword and positional " "arguments to .%s" % func_name + ) + return pos_kwargs + def is_scalar(value: Any, include_0d: bool = True) -> bool: """Whether to treat a value as a scalar. @@ -358,10 +355,7 @@ def dict_equiv( for k in first: if k not in second or not compat(first[k], second[k]): return False - for k in second: - if k not in first: - return False - return True + return all(k in first for k in second) def compat_dict_intersection( @@ -863,7 +857,7 @@ def drop_missing_dims( """ if missing_dims == "raise": - supplied_dims_set = set(val for val in supplied_dims if val is not ...) + supplied_dims_set = {val for val in supplied_dims if val is not ...} invalid = supplied_dims_set - set(dims) if invalid: raise ValueError( diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e4813d72cde..6d6c6e8ab6d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -241,21 +241,14 @@ def as_compatible_data(data, fastpath=False): else: data = np.asarray(data) - if not isinstance(data, np.ndarray): - if hasattr(data, "__array_function__"): - return data + if not isinstance(data, np.ndarray) and hasattr(data, "__array_function__"): + return data # validate whether the data is valid data types. data = np.asarray(data) - if isinstance(data, np.ndarray): - if data.dtype.kind == "O": - data = _possibly_convert_objects(data) - elif data.dtype.kind == "M": - data = _possibly_convert_objects(data) - elif data.dtype.kind == "m": - data = _possibly_convert_objects(data) - + if isinstance(data, np.ndarray) and data.dtype.kind in ["O", "M", "m"]: + data = _possibly_convert_objects(data) return _maybe_wrap_data(data) @@ -273,10 +266,7 @@ def _as_array_or_item(data): TODO: remove this (replace with np.asarray) once these issues are fixed """ - if isinstance(data, cupy_array_type): - data = data.get() - else: - data = np.asarray(data) + data = data.get() if isinstance(data, cupy_array_type) else np.asarray(data) if data.ndim == 0: if data.dtype.kind == "M": data = np.datetime64(data, "ns") @@ -662,9 +652,7 @@ def _broadcast_indexes_basic(self, key): def _validate_indexers(self, key): """Make sanity checks""" for dim, k in zip(self.dims, key): - if isinstance(k, BASIC_INDEXING_TYPES): - pass - else: + if not isinstance(k, BASIC_INDEXING_TYPES): if not isinstance(k, Variable): k = np.asarray(k) if k.ndim > 1: From 46c24eee66f27c37a52271e22a3d16b92c3d3454 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Apr 2021 23:08:26 -0600 Subject: [PATCH 04/16] Misc code cleanup --- xarray/plot/utils.py | 9 ++++----- xarray/tutorial.py | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index a83bc28e273..0235510d2fb 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -44,14 +44,13 @@ def _determine_extend(calc_data, vmin, vmax): extend_min = calc_data.min() < vmin extend_max = calc_data.max() > vmax if extend_min and extend_max: - extend = "both" + return "both" elif extend_min: - extend = "min" + return "min" elif extend_max: - extend = "max" + return "max" else: - extend = "neither" - return extend + return "neither" def _build_discrete_cmap(cmap, levels, extend, filled): diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 80c5e22513d..62762d29216 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -87,10 +87,10 @@ def open_dataset( if name in external_urls: url = external_urls[name] else: - # process the name - default_extension = ".nc" path = pathlib.Path(name) if not path.suffix: + # process the name + default_extension = ".nc" path = path.with_suffix(default_extension) url = f"{base_url}/raw/{version}/{path.name}" From 5d5f7a43b2b242aea1198ef0d235345fbc35e799 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Apr 2021 23:25:40 -0600 Subject: [PATCH 05/16] Rename extracted method --- xarray/core/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 4b92cd7e79b..c5b627eee62 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -107,9 +107,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: (dim,) = ordered_dims return self._data.get_index(dim) # type: ignore[attr-defined] else: - return self._extracted_from_to_index_17(ordered_dims) + return self._compute_multi_index(ordered_dims) - def _extracted_from_to_index_17(self, ordered_dims): + def _compute_multi_index(self, ordered_dims): indexes = [ self._data.get_index(k) for k in ordered_dims # type: ignore[attr-defined] ] From 8c818df9d6ae40603296b93eaff58180369a1531 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Apr 2021 11:52:36 -0600 Subject: [PATCH 06/16] Apply suggestions from code review Co-authored-by: keewis Co-authored-by: Mathias Hauser --- xarray/backends/api.py | 4 ++-- xarray/backends/netCDF4_.py | 3 ++- xarray/backends/plugins.py | 2 +- xarray/backends/scipy_.py | 2 +- xarray/coding/strings.py | 2 +- xarray/core/alignment.py | 4 ++-- xarray/core/coordinates.py | 6 ++---- xarray/core/utils.py | 4 ++-- xarray/core/variable.py | 2 +- 9 files changed, 14 insertions(+), 15 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index cab6f18b77d..9032544440c 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -305,10 +305,10 @@ def _dataset_from_backend_dataset( overwrite_encoded_chunks, **extra_tokens, ): - if not isinstance(chunks, (int, dict)) and chunks is not None and chunks != "auto": + if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}: raise ValueError( "chunks must be an int, dict, 'auto', or None. " - "Instead found %s. " % chunks + f"Instead found {chunks}." ) _protect_dataset_variables_inplace(backend_ds, cache) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index e6b5ce3289b..b2758932bf3 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -279,7 +279,8 @@ def _extract_nc4_variable_encoding( def _is_list_of_strings(value): - return np.asarray(value).dtype.kind in ["U", "S"] and np.asarray(value).size > 1 + arr = np.asarray(value) + return arr.dtype.kind in ["U", "S"] and arr.size > 1 class NetCDF4DataStore(WritableCFDataStore): diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index dddb8ff3d23..123f9efb9c7 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -73,7 +73,7 @@ def sort_backends(backend_entrypoints): ordered_backends_entrypoints = { be_name: backend_entrypoints.pop(be_name) for be_name in STANDARD_BACKENDS_ORDER - if backend_entrypoints.get(be_name, None) is not None + if be_name in backend_entrypoints } ordered_backends_entrypoints.update( diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 5899ec53241..861c1c64ab7 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -208,7 +208,7 @@ def prepare_variable( and variable.encoding != {"_FillValue": None} ): raise ValueError( - "unexpected encoding for scipy backend: %r" % list(variable.encoding) + f"unexpected encoding for scipy backend: {list(variable.encoding)}" ) data = variable.data diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 9395e42f8df..f110bcbd5b8 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -173,7 +173,7 @@ def char_to_bytes(arr): if len(arr.chunks[-1]) > 1: raise ValueError( - "cannot stacked dask character array with " + "cannot stack dask character array with " "multiple chunks in the last dimension: {}".format(arr) ) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index abcd4248a86..60c06478e19 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -339,8 +339,8 @@ def align( for dim, sizes in unlabeled_dim_sizes.items(): if dim not in all_indexes and len(sizes) > 1: raise ValueError( - "arguments without labels along dimension %r cannot be " - "aligned because they have different dimension sizes: %r" % (dim, sizes) + f"arguments without labels along dimension {dim!r} cannot be " + f"aligned because they have different dimension sizes: {sizes!r}" ) result = [] diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index c5b627eee62..d397364a5e0 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -382,10 +382,8 @@ def assert_coordinate_consistent( # make sure there are no conflict in dimension coordinates if k in coords and k in obj.coords and not coords[k].equals(obj[k].variable): raise IndexError( - "dimension coordinate {!r} conflicts between " - "indexed and indexing objects:\n{}\nvs.\n{}".format( - k, obj[k], coords[k] - ) + f"dimension coordinate {k!r} conflicts between " + f"indexed and indexing objects:\n{obj[k]}\nvs.\n{coords[k]}" ) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d92ce8a9584..b8fe2a2b89d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -280,10 +280,10 @@ def either_dict_or_kwargs( return cast(Mapping[Hashable, T], kw_kwargs) if not is_dict_like(pos_kwargs): - raise ValueError("the first argument to .%s must be a dictionary" % func_name) + raise ValueError(f"the first argument to .{func_name} must be a dictionary") if kw_kwargs: raise ValueError( - "cannot specify both keyword and positional " "arguments to .%s" % func_name + f"cannot specify both keyword and positional arguments to .{func_name}" ) return pos_kwargs diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 6d6c6e8ab6d..f063528b700 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -247,7 +247,7 @@ def as_compatible_data(data, fastpath=False): # validate whether the data is valid data types. data = np.asarray(data) - if isinstance(data, np.ndarray) and data.dtype.kind in ["O", "M", "m"]: + if isinstance(data, np.ndarray) and data.dtype.kind in "OMm": data = _possibly_convert_objects(data) return _maybe_wrap_data(data) From 12141d1e874c1eb01d67b069885f118803619d90 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Apr 2021 11:57:20 -0600 Subject: [PATCH 07/16] formatting only --- xarray/backends/api.py | 3 +-- xarray/backends/scipy_.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9032544440c..46904e83439 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -307,8 +307,7 @@ def _dataset_from_backend_dataset( ): if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}: raise ValueError( - "chunks must be an int, dict, 'auto', or None. " - f"Instead found {chunks}." + "chunks must be an int, dict, 'auto', or None. " f"Instead found {chunks}." ) _protect_dataset_variables_inplace(backend_ds, cache) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 861c1c64ab7..8f06e0cd8af 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -208,7 +208,7 @@ def prepare_variable( and variable.encoding != {"_FillValue": None} ): raise ValueError( - f"unexpected encoding for scipy backend: {list(variable.encoding)}" + f"unexpected encoding for scipy backend: {list(variable.encoding)}" ) data = variable.data From 99a54de5017dafd78c11b78c45c03bc7339533f2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Apr 2021 13:49:40 -0600 Subject: [PATCH 08/16] Use f-Strings --- xarray/backends/api.py | 19 ++++---- xarray/backends/common.py | 7 ++- xarray/backends/h5netcdf_.py | 4 +- xarray/backends/locks.py | 2 +- xarray/backends/netCDF4_.py | 14 +++--- xarray/backends/rasterio_.py | 2 +- xarray/backends/scipy_.py | 4 +- xarray/backends/zarr.py | 14 +++--- xarray/coding/strings.py | 2 +- xarray/coding/times.py | 4 +- xarray/coding/variables.py | 10 ++--- xarray/conventions.py | 4 +- xarray/core/alignment.py | 26 +++++------ xarray/core/common.py | 17 +++----- xarray/core/computation.py | 10 ++--- xarray/core/concat.py | 26 +++++------ xarray/core/dataarray.py | 34 +++++++-------- xarray/core/dataset.py | 80 ++++++++++++++++------------------ xarray/core/extensions.py | 7 ++- xarray/core/groupby.py | 12 ++--- xarray/core/merge.py | 24 +++++----- xarray/core/missing.py | 2 +- xarray/core/npcompat.py | 4 +- xarray/core/ops.py | 4 +- xarray/core/options.py | 3 +- xarray/core/parallel.py | 2 +- xarray/core/resample_cftime.py | 2 +- xarray/core/utils.py | 4 +- xarray/core/variable.py | 45 +++++++++---------- xarray/plot/utils.py | 17 ++++---- xarray/tests/test_groupby.py | 4 +- xarray/ufuncs.py | 7 ++- xarray/util/print_versions.py | 18 ++++---- 33 files changed, 205 insertions(+), 229 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 46904e83439..849b035bd7d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -280,7 +280,7 @@ def _chunk_ds( mtime = _get_mtime(filename_or_obj) token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens) - name_prefix = "open_dataset-%s" % token + name_prefix = f"open_dataset-{token}" variables = {} for name, var in backend_ds.variables.items(): @@ -993,8 +993,8 @@ def to_netcdf( elif engine != "scipy": raise ValueError( "invalid engine for creating bytes with " - "to_netcdf: %r. Only the default engine " - "or engine='scipy' is supported" % engine + f"to_netcdf: {engine!r}. Only the default engine " + "or engine='scipy' is supported" ) if not compute: raise NotImplementedError( @@ -1015,7 +1015,7 @@ def to_netcdf( try: store_open = WRITEABLE_STORES[engine] except KeyError: - raise ValueError("unrecognized engine for to_netcdf: %r" % engine) + raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}") if format is not None: format = format.upper() @@ -1027,9 +1027,8 @@ def to_netcdf( autoclose = have_chunks and scheduler in ["distributed", "multiprocessing"] if autoclose and engine == "scipy": raise NotImplementedError( - "Writing netCDF files with the %s backend " - "is not currently supported with dask's %s " - "scheduler" % (engine, scheduler) + f"Writing netCDF files with the {engine} backend " + f"is not currently supported with dask's {scheduler} scheduler" ) target = path_or_file if path_or_file is not None else BytesIO() @@ -1039,7 +1038,7 @@ def to_netcdf( kwargs["invalid_netcdf"] = invalid_netcdf else: raise ValueError( - "unrecognized option 'invalid_netcdf' for engine %s" % engine + f"unrecognized option 'invalid_netcdf' for engine {engine}" ) store = store_open(target, mode, format, group, **kwargs) @@ -1181,7 +1180,7 @@ def save_mfdataset( Data variables: a (time) float64 0.0 0.02128 0.04255 0.06383 ... 0.9574 0.9787 1.0 >>> years, datasets = zip(*ds.groupby("time.year")) - >>> paths = ["%s.nc" % y for y in years] + >>> paths = [f"{y}.nc" for y in years] >>> xr.save_mfdataset(datasets, paths) """ if mode == "w" and len(set(paths)) < len(paths): @@ -1193,7 +1192,7 @@ def save_mfdataset( if not isinstance(obj, Dataset): raise TypeError( "save_mfdataset only supports writing Dataset " - "objects, received type %s" % type(obj) + f"objects, received type {type(obj)}" ) if groups is None: diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 026c7e5c7db..64a245ddead 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -69,9 +69,8 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 base_delay = initial_delay * 2 ** n next_delay = base_delay + np.random.randint(base_delay) msg = ( - "getitem failed, waiting %s ms before trying again " - "(%s tries remaining). Full traceback: %s" - % (next_delay, max_retries - n, traceback.format_exc()) + f"getitem failed, waiting {next_delay} ms before trying again " + f"({max_retries - n} tries remaining). Full traceback: {traceback.format_exc()}" ) logger.debug(msg) time.sleep(1e-3 * next_delay) @@ -336,7 +335,7 @@ def set_dimensions(self, variables, unlimited_dims=None): if dim in existing_dims and length != existing_dims[dim]: raise ValueError( "Unable to update size for existing dimension" - "%r (%d != %d)" % (dim, length, existing_dims[dim]) + f"{dim!r} ({length} != {existing_dims[dim]})" ) elif dim not in existing_dims: is_unlimited = dim in unlimited_dims diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 4e7e6dd8d0b..2ecc607fbb1 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -263,9 +263,9 @@ def prepare_variable( "h5netcdf does not yet support setting a fill value for " "variable-length strings " "(https://github.com/shoyer/h5netcdf/issues/37). " - "Either remove '_FillValue' from encoding on variable %r " + f"Either remove '_FillValue' from encoding on variable {name!r} " "or set {'dtype': 'S1'} in encoding to use the fixed width " - "NC_CHAR type." % name + "NC_CHAR type." ) if dtype is str: diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 5303ea49381..59417336f5f 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -167,7 +167,7 @@ def locked(self): return any(lock.locked for lock in self.locks) def __repr__(self): - return "CombinedLock(%r)" % list(self.locks) + return f"CombinedLock({list(self.locks)!r})" class DummyLock: diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index b2758932bf3..fcd7ca72ae4 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -122,8 +122,8 @@ def _encode_nc4_variable(var): def _check_encoding_dtype_is_vlen_string(dtype): if dtype is not str: raise AssertionError( # pragma: no cover - "unexpected dtype encoding %r. This shouldn't happen: please " - "file a bug report at github.com/pydata/xarray" % dtype + f"unexpected dtype encoding {dtype!r}. This shouldn't happen: please " + "file a bug report at github.com/pydata/xarray" ) @@ -173,7 +173,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): ds = create_group(ds, key) else: # wrap error to provide slightly more helpful message - raise OSError("group not found: %s" % key, e) + raise OSError(f"group not found: {key}", e) # use the root group return ds @@ -267,8 +267,8 @@ def _extract_nc4_variable_encoding( invalid = [k for k in encoding if k not in valid_encodings] if invalid: raise ValueError( - "unexpected encoding parameters for %r backend: %r. Valid " - "encodings are: %r" % (backend, invalid, valid_encodings) + f"unexpected encoding parameters for {backend!r} backend: {invalid!r}. Valid " + f"encodings are: {valid_encodings!r}" ) else: for k in list(encoding): @@ -462,9 +462,9 @@ def prepare_variable( "netCDF4 does not yet support setting a fill value for " "variable-length strings " "(https://github.com/Unidata/netcdf4-python/issues/730). " - "Either remove '_FillValue' from encoding on variable %r " + f"Either remove '_FillValue' from encoding on variable {name!r} " "or set {'dtype': 'S1'} in encoding to use the fixed width " - "NC_CHAR type." % name + "NC_CHAR type." ) encoding = _extract_nc4_variable_encoding( diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index f5d9b7bf900..49a5a9ec7ae 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -389,7 +389,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc # the filename is probably an s3 bucket rather than a regular file mtime = None token = tokenize(filename, mtime, chunks) - name_prefix = "open_rasterio-%s" % token + name_prefix = f"open_rasterio-{token}" result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 8f06e0cd8af..9c33b172639 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -128,7 +128,7 @@ def __init__( elif format == "NETCDF3_CLASSIC": version = 1 else: - raise ValueError("invalid format for scipy.io.netcdf backend: %r" % format) + raise ValueError(f"invalid format for scipy.io.netcdf backend: {format!r}") if lock is None and mode != "r" and isinstance(filename_or_obj, str): lock = get_write_lock(filename_or_obj) @@ -181,7 +181,7 @@ def get_encoding(self): def set_dimension(self, name, length, is_unlimited=False): if name in self.ds.dimensions: raise ValueError( - "%s does not support modifying dimensions" % type(self).__name__ + f"{type(self).__name__} does not support modifying dimensions" ) dim_length = length if not is_unlimited else None self.ds.createDimension(name, dim_length) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index f04e143d6a4..22fe345e33f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -204,8 +204,8 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): dimensions = zarr_obj.attrs[dimension_key] except KeyError: raise KeyError( - "Zarr object is missing the attribute `%s`, which is " - "required for xarray to determine variable dimensions." % (dimension_key) + f"Zarr object is missing the attribute `{dimension_key}`, which is " + "required for xarray to determine variable dimensions." ) attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key]) return dimensions, attributes @@ -235,7 +235,7 @@ def extract_zarr_variable_encoding( invalid = [k for k in encoding if k not in valid_encodings] if invalid: raise ValueError( - "unexpected encoding parameters for zarr backend: %r" % invalid + f"unexpected encoding parameters for zarr backend: {invalid!r}" ) else: for k in list(encoding): @@ -388,16 +388,16 @@ def get_dimensions(self): for d, s in zip(v.attrs[DIMENSION_KEY], v.shape): if d in dimensions and dimensions[d] != s: raise ValueError( - "found conflicting lengths for dimension %s " - "(%d != %d)" % (d, s, dimensions[d]) + f"found conflicting lengths for dimension {d} " + f"({s} != {dimensions[d]})" ) dimensions[d] = s except KeyError: raise KeyError( - "Zarr object is missing the attribute `%s`, " + f"Zarr object is missing the attribute `{DIMENSION_KEY}`, " "which is required for xarray to determine " - "variable dimensions." % (DIMENSION_KEY) + "variable dimensions." ) return dimensions diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index f110bcbd5b8..d52ba13caaa 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -111,7 +111,7 @@ def encode(self, variable, name=None): if "char_dim_name" in encoding.keys(): char_dim_name = encoding.pop("char_dim_name") else: - char_dim_name = "string%s" % data.shape[-1] + char_dim_name = f"string{data.shape[-1]}" dims = dims + (char_dim_name,) return Variable(dims, data, attrs, encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fde38de1a31..9f5d1f87aee 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -75,7 +75,7 @@ def _is_standard_calendar(calendar): def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith("s"): - units = "%ss" % units + units = f"{units}s" return { "nanoseconds": "ns", "microseconds": "us", @@ -147,7 +147,7 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime): result = decode_cf_datetime(example_value, units, calendar, use_cftime) except Exception: calendar_msg = ( - "the default calendar" if calendar is None else "calendar %r" % calendar + "the default calendar" if calendar is None else f"calendar {calendar!r}" ) msg = ( f"unable to decode time units {units!r} with {calendar_msg!r}. Try " diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 78b227bf396..231b531b1a6 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -294,7 +294,7 @@ def encode(self, variable, name=None): # integer data should be treated as unsigned" if encoding.get("_Unsigned", "false") == "true": pop_to(encoding, attrs, "_Unsigned") - signed_dtype = np.dtype("i%s" % data.dtype.itemsize) + signed_dtype = np.dtype(f"i{data.dtype.itemsize}") if "_FillValue" in attrs: new_fill = signed_dtype.type(attrs["_FillValue"]) attrs["_FillValue"] = new_fill @@ -310,7 +310,7 @@ def decode(self, variable, name=None): if data.dtype.kind == "i": if unsigned == "true": - unsigned_dtype = np.dtype("u%s" % data.dtype.itemsize) + unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) data = lazy_elemwise_func(data, transform, unsigned_dtype) if "_FillValue" in attrs: @@ -318,7 +318,7 @@ def decode(self, variable, name=None): attrs["_FillValue"] = new_fill elif data.dtype.kind == "u": if unsigned == "false": - signed_dtype = np.dtype("i%s" % data.dtype.itemsize) + signed_dtype = np.dtype(f"i{data.dtype.itemsize}") transform = partial(np.asarray, dtype=signed_dtype) data = lazy_elemwise_func(data, transform, signed_dtype) if "_FillValue" in attrs: @@ -326,8 +326,8 @@ def decode(self, variable, name=None): attrs["_FillValue"] = new_fill else: warnings.warn( - "variable %r has _Unsigned attribute but is not " - "of integer type. Ignoring attribute." % name, + f"variable {name!r} has _Unsigned attribute but is not " + "of integer type. Ignoring attribute.", SerializationWarning, stacklevel=3, ) diff --git a/xarray/conventions.py b/xarray/conventions.py index aece572fda3..901d19bd99b 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -110,9 +110,9 @@ def maybe_encode_nonstring_dtype(var, name=None): and "missing_value" not in var.attrs ): warnings.warn( - "saving variable %s with floating " + f"saving variable {name} with floating " "point data as an integer dtype without " - "any _FillValue to use for NaNs" % name, + "any _FillValue to use for NaNs", SerializationWarning, stacklevel=10, ) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 60c06478e19..344e3054237 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -47,7 +47,7 @@ def _get_joiner(join): # We rewrite all indexes and then use join='left' return operator.itemgetter(0) else: - raise ValueError("invalid value for join: %s" % join) + raise ValueError(f"invalid value for join: {join}") def _override_indexes(objects, all_indexes, exclude): @@ -56,8 +56,8 @@ def _override_indexes(objects, all_indexes, exclude): lengths = {index.size for index in dim_indexes} if len(lengths) != 1: raise ValueError( - "Indexes along dimension %r don't have the same length." - " Cannot use join='override'." % dim + f"Indexes along dimension {dim!r} don't have the same length." + " Cannot use join='override'." ) objects = list(objects) @@ -330,10 +330,9 @@ def align( labeled_size = index.size if len(unlabeled_sizes | {labeled_size}) > 1: raise ValueError( - "arguments without labels along dimension %r cannot be " - "aligned because they have different dimension size(s) %r " - "than the size of the aligned dimension labels: %r" - % (dim, unlabeled_sizes, labeled_size) + f"arguments without labels along dimension {dim!r} cannot be " + f"aligned because they have different dimension size(s) {unlabeled_sizes!r} " + f"than the size of the aligned dimension labels: {labeled_size!r}" ) for dim, sizes in unlabeled_dim_sizes.items(): @@ -475,8 +474,7 @@ def reindex_like_indexers( if other_size != target_size: raise ValueError( "different size for unlabeled " - "dimension on argument %r: %r vs %r" - % (dim, other_size, target_size) + f"dimension on argument {dim!r}: {other_size!r} vs {target_size!r}" ) return indexers @@ -563,8 +561,8 @@ def reindex_variables( if not index.is_unique: raise ValueError( - "cannot reindex or align along dimension %r because the " - "index has duplicate values" % dim + f"cannot reindex or align along dimension {dim!r} because the " + "index has duplicate values" ) int_indexer = get_indexer_nd(index, target, method, tolerance) @@ -591,9 +589,9 @@ def reindex_variables( new_size = indexers[dim].size if existing_size != new_size: raise ValueError( - "cannot reindex or align along dimension %r without an " - "index because its size %r is different from the size of " - "the new index %r" % (dim, existing_size, new_size) + f"cannot reindex or align along dimension {dim!r} without an " + f"index because its size {existing_size!r} is different from the size of " + f"the new index {new_size!r}" ) for name, var in variables.items(): diff --git a/xarray/core/common.py b/xarray/core/common.py index ce6157e6386..9af9a5b77d4 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -209,11 +209,11 @@ def __init_subclass__(cls, **kwargs): if not hasattr(object.__new__(cls), "__dict__"): pass elif cls.__module__.startswith("xarray."): - raise AttributeError("%s must explicitly define __slots__" % cls.__name__) + raise AttributeError(f"{cls.__name__} must explicitly define __slots__") else: cls.__setattr__ = cls._setattr_dict warnings.warn( - "xarray subclass %s should explicitly define __slots__" % cls.__name__, + f"xarray subclass {cls.__name__} should explicitly define __slots__", FutureWarning, stacklevel=2, ) @@ -251,10 +251,9 @@ def _setattr_dict(self, name: str, value: Any) -> None: if name in self.__dict__: # Custom, non-slotted attr, or improperly assigned variable? warnings.warn( - "Setting attribute %r on a %r object. Explicitly define __slots__ " + f"Setting attribute {name!r} on a {type(self).__name__!r} object. Explicitly define __slots__ " "to suppress this warning for legitimate custom attributes and " - "raise an error when attempting variables assignments." - % (name, type(self).__name__), + "raise an error when attempting variables assignments.", FutureWarning, stacklevel=2, ) @@ -274,9 +273,8 @@ def __setattr__(self, name: str, value: Any) -> None: ): raise raise AttributeError( - "cannot set attribute %r on a %r object. Use __setitem__ style" + f"cannot set attribute {name!r} on a {type(self).__name__!r} object. Use __setitem__ style" "assignment (e.g., `ds['name'] = ...`) instead of assigning variables." - % (name, type(self).__name__) ) from e def __dir__(self) -> List[str]: @@ -655,7 +653,7 @@ def pipe( func, target = func if target in kwargs: raise ValueError( - "%s is both the pipe target and a keyword argument" % target + f"{target} is both the pipe target and a keyword argument" ) kwargs[target] = self return func(*args, **kwargs) @@ -1270,8 +1268,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): if not isinstance(cond, (Dataset, DataArray)): raise TypeError( - "cond argument is %r but must be a %r or %r" - % (cond, Dataset, DataArray) + f"cond argument is {cond!r} but must be a {Dataset!r} or {DataArray!r}" ) # align so we can use integer indexing diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 36b0584019d..8e5e8ea054e 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -307,7 +307,7 @@ def assert_and_return_exact_match(all_keys): if keys != first_keys: raise ValueError( "exact match required for all data variable names, " - "but %r != %r" % (keys, first_keys) + f"but {keys!r} != {first_keys!r}" ) return first_keys @@ -516,7 +516,7 @@ def unified_dim_sizes( if len(set(var.dims)) < len(var.dims): raise ValueError( "broadcasting cannot handle duplicate " - "dimensions on a variable: %r" % list(var.dims) + f"dimensions on a variable: {list(var.dims)}" ) for dim, size in zip(var.dims, var.shape): if dim not in exclude_dims: @@ -526,7 +526,7 @@ def unified_dim_sizes( raise ValueError( "operands cannot be broadcast together " "with mismatched lengths for dimension " - "%r: %s vs %s" % (dim, dim_sizes[dim], size) + f"{dim}: {dim_sizes[dim]} vs {size}" ) return dim_sizes @@ -563,8 +563,8 @@ def broadcast_compat_data( if unexpected_dims: raise ValueError( "operand to apply_ufunc encountered unexpected " - "dimensions %r on an input variable: these are core " - "dimensions on other input or output variables" % unexpected_dims + f"dimensions {unexpected_dims!r} on an input variable: these are core " + "dimensions on other input or output variables" ) # for consistency with numpy, keep broadcast dimensions to the left diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 7a958eb1404..fa0b9c0741d 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -223,8 +223,7 @@ def concat( if compat not in _VALID_COMPAT: raise ValueError( - "compat=%r invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" - % compat + f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) if isinstance(first_obj, DataArray): @@ -234,7 +233,7 @@ def concat( else: raise TypeError( "can only concatenate xarray Dataset and DataArray " - "objects, got %s" % type(first_obj) + f"objects, got {type(first_obj)}" ) return f( objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs @@ -293,18 +292,16 @@ def process_subset_opt(opt, subset): if opt == "different": if compat == "override": raise ValueError( - "Cannot specify both %s='different' and compat='override'." - % subset + f"Cannot specify both {subset}='different' and compat='override'." ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: equals[k] = None - variables = [] - for ds in datasets: - if k in ds.variables: - variables.append(ds.variables[k]) + variables = [ + ds.variables[k] for ds in datasets if k in ds.variables + ] if len(variables) == 1: # coords="different" doesn't make sense when only one object @@ -367,12 +364,12 @@ def process_subset_opt(opt, subset): if subset == "coords": raise ValueError( "some variables in coords are not coordinates on " - "the first dataset: %s" % (invalid_vars,) + f"the first dataset: {invalid_vars}" ) else: raise ValueError( "some variables in data_vars are not data variables " - "on the first dataset: %s" % (invalid_vars,) + f"on the first dataset: {invalid_vars}" ) concat_over.update(opt) @@ -439,7 +436,7 @@ def _dataset_concat( both_data_and_coords = coord_names & data_names if both_data_and_coords: raise ValueError( - "%r is a coordinate in some datasets but not others." % both_data_and_coords + f"{both_data_and_coords!r} is a coordinate in some datasets but not others." ) # we don't want the concat dimension in the result dataset yet dim_coords.pop(dim, None) @@ -507,7 +504,7 @@ def ensure_common_dims(vars): try: vars = ensure_common_dims([ds[k].variable for ds in datasets]) except KeyError: - raise ValueError("%r is not present in all datasets." % k) + raise ValueError(f"{k!r} is not present in all datasets.") combined = concat_vars(vars, dim, positions) assert isinstance(combined, Variable) result_vars[k] = combined @@ -519,8 +516,7 @@ def ensure_common_dims(vars): absent_coord_names = coord_names - set(result.variables) if absent_coord_names: raise ValueError( - "Variables %r are coordinates in some datasets but not others." - % absent_coord_names + f"Variables {absent_coord_names!r} are coordinates in some datasets but not others." ) result = result.set_coords(coord_names) result.encoding = result_encoding diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 90d9ff750f3..a6b1960922c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -98,16 +98,16 @@ def _infer_coords_and_dims( and len(coords) != len(shape) ): raise ValueError( - "coords is not dict-like, but it has %s items, " - "which does not match the %s dimensions of the " - "data" % (len(coords), len(shape)) + f"coords is not dict-like, but it has {len(coords)} items, " + f"which does not match the {len(shape)} dimensions of the " + "data" ) if isinstance(dims, str): dims = (dims,) if dims is None: - dims = ["dim_%s" % n for n in range(len(shape))] + dims = [f"dim_{n}" for n in range(len(shape))] if coords is not None and len(coords) == len(shape): # try to infer dimensions from coords if utils.is_dict_like(coords): @@ -125,12 +125,12 @@ def _infer_coords_and_dims( elif len(dims) != len(shape): raise ValueError( "different number of dimensions on data " - "and dims: %s vs %s" % (len(shape), len(dims)) + f"and dims: {len(shape)} vs {len(dims)}" ) else: for d in dims: if not isinstance(d, str): - raise TypeError("dimension %s is not a string" % d) + raise TypeError(f"dimension {d} is not a string") new_coords: Dict[Any, Variable] = {} @@ -147,24 +147,24 @@ def _infer_coords_and_dims( for k, v in new_coords.items(): if any(d not in dims for d in v.dims): raise ValueError( - "coordinate %s has dimensions %s, but these " + f"coordinate {k} has dimensions {v.dims}, but these " "are not a subset of the DataArray " - "dimensions %s" % (k, v.dims, dims) + f"dimensions {dims}" ) for d, s in zip(v.dims, v.shape): if s != sizes[d]: raise ValueError( - "conflicting sizes for dimension %r: " - "length %s on the data but length %s on " - "coordinate %r" % (d, sizes[d], s, k) + f"conflicting sizes for dimension {d!r}: " + f"length {sizes[d]} on the data but length {s} on " + f"coordinate {k!r}" ) if k in sizes and v.shape != (sizes[k],): raise ValueError( - "coordinate %r is a DataArray dimension, but " - "it has shape %r rather than expected shape %r " - "matching the dimension size" % (k, v.shape, (sizes[k],)) + f"coordinate {k!r} is a DataArray dimension, but " + f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} " + "matching the dimension size" ) assert_unique_multiindex_level_names(new_coords) @@ -1988,7 +1988,7 @@ def reorder_levels( coord = self._coords[dim] index = coord.to_index() if not isinstance(index, pd.MultiIndex): - raise ValueError("coordinate %r has no MultiIndex" % dim) + raise ValueError(f"coordinate {dim!r} has no MultiIndex") replace_coords[dim] = IndexVariable(coord.dims, index.reorder_levels(order)) coords = self._coords.copy() coords.update(replace_coords) @@ -2632,8 +2632,8 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: constructor = constructors[self.ndim] except KeyError: raise ValueError( - "cannot convert arrays with %s dimensions into " - "pandas objects" % self.ndim + f"cannot convert arrays with {self.ndim} dimensions into " + "pandas objects" ) indexes = [self.get_index(dim) for dim in self.dims] return constructor(self.values, *indexes) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ca8d8a28e3a..2e71d07bb0e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -194,16 +194,15 @@ def calculate_dimensions(variables: Mapping[Hashable, Variable]) -> Dict[Hashabl for dim, size in zip(var.dims, var.shape): if dim in scalar_vars: raise ValueError( - "dimension %r already exists as a scalar variable" % dim + f"dimension {dim!r} already exists as a scalar variable" ) if dim not in dims: dims[dim] = size last_used[dim] = k elif dims[dim] != size: raise ValueError( - "conflicting sizes for dimension %r: " - "length %s on %r and length %s on %r" - % (dim, size, k, dims[dim], last_used[dim]) + f"conflicting sizes for dimension {dim!r}: " + f"length {size} on {k!r} and length {dims[dim]} on {last_used!r}" ) return dims @@ -243,8 +242,7 @@ def merge_indexes( and var.dims != current_index_variable.dims ): raise ValueError( - "dimension mismatch between %r %s and %r %s" - % (dim, current_index_variable.dims, n, var.dims) + f"dimension mismatch between {dim!r} {current_index_variable.dims} and {n!r} {var.dims}" ) if current_index_variable is not None and append: @@ -254,7 +252,7 @@ def merge_indexes( codes.extend(current_index.codes) levels.extend(current_index.levels) else: - names.append("%s_level_0" % dim) + names.append(f"{dim}_level_0") cat = pd.Categorical(current_index.values, ordered=True) codes.append(cat.codes) levels.append(cat.categories) @@ -731,8 +729,7 @@ def __init__( both_data_and_coords = set(data_vars) & set(coords) if both_data_and_coords: raise ValueError( - "variables %r are found in both data_vars and coords" - % both_data_and_coords + f"variables {both_data_and_coords!r} are found in both data_vars and coords" ) if isinstance(coords, Dataset): @@ -1651,7 +1648,7 @@ def reset_coords( bad_coords = set(names) & set(self.dims) if bad_coords: raise ValueError( - "cannot remove index coordinates with reset_coords: %s" % bad_coords + f"cannot remove index coordinates with reset_coords: {bad_coords}" ) obj = self.copy() obj._coord_names.difference_update(names) @@ -2001,7 +1998,7 @@ def chunk( bad_dims = chunks.keys() - self.dims.keys() if bad_dims: raise ValueError( - "some chunks keys are not dimensions on this " "object: %s" % bad_dims + f"some chunks keys are not dimensions on this object: {bad_dims}" ) variables = { @@ -2357,12 +2354,12 @@ def head( if not isinstance(v, int): raise TypeError( "expected integer type indexer for " - "dimension %r, found %r" % (k, type(v)) + f"dimension {k!r}, found {type(v)!r}" ) elif v < 0: raise ValueError( "expected positive integer as indexer " - "for dimension %r, found %s" % (k, v) + f"for dimension {k!r}, found {v}" ) indexers_slices = {k: slice(val) for k, val in indexers.items()} return self.isel(indexers_slices) @@ -2403,12 +2400,12 @@ def tail( if not isinstance(v, int): raise TypeError( "expected integer type indexer for " - "dimension %r, found %r" % (k, type(v)) + f"dimension {k!r}, found {type(v)!r}" ) elif v < 0: raise ValueError( "expected positive integer as indexer " - "for dimension %r, found %s" % (k, v) + f"for dimension {k!r}, found {v}" ) indexers_slices = { k: slice(-val, None) if val != 0 else slice(val) @@ -2453,12 +2450,12 @@ def thin( if not isinstance(v, int): raise TypeError( "expected integer type indexer for " - "dimension %r, found %r" % (k, type(v)) + f"dimension {k!r}, found {type(v)!r}" ) elif v < 0: raise ValueError( "expected positive integer as indexer " - "for dimension %r, found %s" % (k, v) + f"for dimension {k!r}, found {v}" ) elif v == 0: raise ValueError("step cannot be zero") @@ -2779,7 +2776,7 @@ def _reindex( bad_dims = [d for d in indexers if d not in self.dims] if bad_dims: - raise ValueError("invalid reindex dimensions: %s" % bad_dims) + raise ValueError(f"invalid reindex dimensions: {bad_dims}") variables, indexes = alignment.reindex_variables( self.variables, @@ -3135,8 +3132,8 @@ def rename( for k in name_dict.keys(): if k not in self and k not in self.dims: raise ValueError( - "cannot rename %r because it is not a " - "variable or dimension in this dataset" % k + f"cannot rename {k!r} because it is not a " + "variable or dimension in this dataset" ) variables, coord_names, dims, indexes = self._rename_all( @@ -3176,8 +3173,8 @@ def rename_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - "cannot rename %r because it is not a " - "dimension in this dataset" % k + f"cannot rename {k!r} because it is not a " + "dimension in this dataset" ) if v in self.dims or v in self: raise ValueError( @@ -3220,8 +3217,8 @@ def rename_vars( for k in name_dict: if k not in self: raise ValueError( - "cannot rename %r because it is not a " - "variable or coordinate in this dataset" % k + f"cannot rename {k!r} because it is not a " + "variable or coordinate in this dataset" ) variables, coord_names, dims, indexes = self._rename_all( name_dict=name_dict, dims_dict={} @@ -3296,13 +3293,13 @@ def swap_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - "cannot swap from dimension %r because it is " - "not an existing dimension" % k + f"cannot swap from dimension {k!r} because it is " + "not an existing dimension" ) if v in self.variables and self.variables[v].dims != (k,): raise ValueError( - "replacement dimension %r is not a 1D " - "variable along the old dimension %r" % (v, k) + f"replacement dimension {v!r} is not a 1D " + f"variable along the old dimension {k!r}" ) result_dims = {dims_dict.get(dim, dim) for dim in self.dims} @@ -3902,7 +3899,7 @@ def unstack( missing_dims = [d for d in dims if d not in self.dims] if missing_dims: raise ValueError( - "Dataset does not contain the dimensions: %s" % missing_dims + f"Dataset does not contain the dimensions: {missing_dims}" ) non_multi_dims = [ @@ -3911,7 +3908,7 @@ def unstack( if non_multi_dims: raise ValueError( "cannot unstack dimensions that do not " - "have a MultiIndex: %s" % non_multi_dims + f"have a MultiIndex: {non_multi_dims}" ) result = self.copy(deep=False) @@ -4228,7 +4225,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): try: index = self.get_index(dim) except KeyError: - raise ValueError("dimension %r does not have coordinate labels" % dim) + raise ValueError(f"dimension {dim!r} does not have coordinate labels") new_index = index.drop(labels_for_dim, errors=errors) ds = ds.loc[{dim: new_index}] return ds @@ -4335,7 +4332,7 @@ def drop_dims( missing_dims = drop_dims - set(self.dims) if missing_dims: raise ValueError( - "Dataset does not contain the dimensions: %s" % missing_dims + f"Dataset does not contain the dimensions: {missing_dims}" ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} @@ -4373,8 +4370,8 @@ def transpose(self, *dims: Hashable) -> "Dataset": if dims: if set(dims) ^ set(self.dims) and ... not in dims: raise ValueError( - "arguments to transpose (%s) must be " - "permuted dataset dimensions (%s)" % (dims, tuple(self.dims)) + f"arguments to transpose ({dims}) must be " + f"permuted dataset dimensions ({tuple(self.dims)})" ) ds = self.copy() for name, var in self._variables.items(): @@ -4415,7 +4412,7 @@ def dropna( # depending on the order of the supplied axes. if dim not in self.dims: - raise ValueError("%s must be a single dataset dimension" % dim) + raise ValueError(f"{dim} must be a single dataset dimension") if subset is None: subset = iter(self.data_vars) @@ -4437,7 +4434,7 @@ def dropna( elif how == "all": mask = count > 0 elif how is not None: - raise ValueError("invalid how option: %s" % how) + raise ValueError(f"invalid how option: {how}") else: raise TypeError("must specify how or thresh") @@ -4784,7 +4781,7 @@ def reduce( missing_dimensions = [d for d in dims if d not in self.dims] if missing_dimensions: raise ValueError( - "Dataset does not contain the dimensions: %s" % missing_dimensions + f"Dataset does not contain the dimensions: {missing_dimensions}" ) if keep_attrs is None: @@ -5471,8 +5468,7 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): if inplace and set(lhs_data_vars) != set(rhs_data_vars): raise ValueError( "datasets must have the same data variables " - "for in-place arithmetic operations: %s, %s" - % (list(lhs_data_vars), list(rhs_data_vars)) + f"for in-place arithmetic operations: {list(lhs_data_vars)}, {list(rhs_data_vars)}" ) dest_vars = {} @@ -5641,7 +5637,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + raise ValueError(f"dimensions {invalid!r} do not exist") variables = {} for name, var in self.variables.items(): @@ -5703,7 +5699,7 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + raise ValueError(f"dimensions {invalid!r} do not exist") if roll_coords is None: warnings.warn( @@ -5965,7 +5961,7 @@ def rank(self, dim, pct=False, keep_attrs=None): Variables that do not depend on `dim` are dropped. """ if dim not in self.dims: - raise ValueError("Dataset does not contain the dimension: %s" % dim) + raise ValueError(f"Dataset does not contain the dimension: {dim}") variables = {} for name, var in self.variables.items(): diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 9b7b060107b..3debefe2e0d 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -38,7 +38,7 @@ def __get__(self, obj, cls): # __getattr__ on data object will swallow any AttributeErrors # raised when initializing the accessor, so we need to raise as # something else (GH933): - raise RuntimeError("error initializing %r accessor." % self._name) + raise RuntimeError(f"error initializing {self._name!r} accessor.") cache[self._name] = accessor_obj return accessor_obj @@ -48,9 +48,8 @@ def _register_accessor(name, cls): def decorator(accessor): if hasattr(cls, name): warnings.warn( - "registration of accessor %r under name %r for type %r is " - "overriding a preexisting attribute with the same name." - % (accessor, name, cls), + f"registration of accessor {accessor!r} under name {name!r} for type {cls!r} is " + "overriding a preexisting attribute with the same name.", AccessorRegistrationWarning, stacklevel=2, ) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3149606caab..073bddcbef9 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,8 +29,8 @@ def check_reduce_dims(reduce_dims, dimensions): reduce_dims = [reduce_dims] if any(dim not in dimensions for dim in reduce_dims): raise ValueError( - "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (reduce_dims, dimensions) + f"cannot reduce over dimensions {reduce_dims!r}. expected either '...' " + f"to reduce over all dimensions or one or more of {dimensions!r}." ) @@ -105,7 +105,7 @@ def _consolidate_slices(slices): last_slice = slice(None) for slice_ in slices: if not isinstance(slice_, slice): - raise ValueError("list element is not a slice: %r" % slice_) + raise ValueError(f"list element is not a slice: {slice_!r}") if ( result and last_slice.stop == slice_.start @@ -378,7 +378,7 @@ def __init__( if len(group_indices) == 0: if bins is not None: raise ValueError( - "None of the data falls within bins with edges %r" % bins + f"None of the data falls within bins with edges {bins!r}" ) else: raise ValueError( @@ -491,8 +491,8 @@ def _yield_binary_applied(self, func, other): if self._group.name not in other.dims: raise ValueError( "incompatible dimensions for a grouped " - "binary operation: the group variable %r " - "is not a dimension on the other argument" % self._group.name + f"binary operation: the group variable {self._group.name!r} " + "is not a dimension on the other argument" ) if dummy is None: dummy = _dummy_copy(other) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index c250c80c3ea..7ba062d4576 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -65,7 +65,7 @@ def broadcast_dimension_size(variables: List[Variable]) -> Dict[Hashable, int]: for var in variables: for dim, size in zip(var.dims, var.shape): if dim in dims and size != dims[dim]: - raise ValueError("index %r not aligned" % dim) + raise ValueError(f"index {dim!r} not aligned") dims[dim] = size return dims @@ -209,17 +209,15 @@ def merge_collected( for _, other_index in indexed_elements[1:]: if not index.equals(other_index): raise MergeError( - "conflicting values for index %r on objects to be " - "combined:\nfirst value: %r\nsecond value: %r" - % (name, index, other_index) + f"conflicting values for index {name!r} on objects to be " + f"combined:\nfirst value: {index!r}\nsecond value: {other_index!r}" ) if compat == "identical": for other_variable, _ in indexed_elements[1:]: if not dict_equiv(variable.attrs, other_variable.attrs): raise MergeError( "conflicting attribute values on combined " - "variable %r:\nfirst value: %r\nsecond value: %r" - % (name, variable.attrs, other_variable.attrs) + f"variable {name!r}:\nfirst value: {variable.attrs!r}\nsecond value: {other_variable.attrs!r}" ) merged_vars[name] = variable merged_indexes[name] = index @@ -486,9 +484,9 @@ def assert_valid_explicit_coords(variables, dims, explicit_coords): for coord_name in explicit_coords: if coord_name in dims and variables[coord_name].dims != (coord_name,): raise MergeError( - "coordinate %s shares a name with a dataset dimension, but is " + f"coordinate {coord_name} shares a name with a dataset dimension, but is " "not a 1D variable along that dimension. This is disallowed " - "by the xarray data model." % coord_name + "by the xarray data model." ) @@ -510,7 +508,7 @@ def merge_attrs(variable_attrs, combine_attrs): except ValueError: raise MergeError( "combine_attrs='no_conflicts', but some values are not " - "the same. Merging %s with %s" % (str(result), str(attrs)) + f"the same. Merging {str(result)} with {str(attrs)}" ) return result elif combine_attrs == "drop_conflicts": @@ -536,12 +534,12 @@ def merge_attrs(variable_attrs, combine_attrs): for attrs in variable_attrs[1:]: if not dict_equiv(result, attrs): raise MergeError( - "combine_attrs='identical', but attrs differ. First is %s " - ", other is %s." % (str(result), str(attrs)) + f"combine_attrs='identical', but attrs differ. First is {str(result)} " + f", other is {str(attrs)}." ) return result else: - raise ValueError("Unrecognised value for combine_attrs=%s" % combine_attrs) + raise ValueError(f"Unrecognised value for combine_attrs={combine_attrs}") class _MergeResult(NamedTuple): @@ -629,7 +627,7 @@ def merge_core( if ambiguous_coords: raise MergeError( "unable to determine if these variables should be " - "coordinates or not in the merged result: %s" % ambiguous_coords + f"coordinates or not in the merged result: {ambiguous_coords}" ) attrs = merge_attrs( diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 1407107a7be..b6983e7055e 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -93,7 +93,7 @@ def __init__(self, xi, yi, method="linear", fill_value=None, period=None): self._left = fill_value self._right = fill_value else: - raise ValueError("%s is not a valid fill_value" % fill_value) + raise ValueError(f"{fill_value} is not a valid fill_value") def __call__(self, x): return self.f( diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 7dab09bf847..35bac982d4c 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -46,9 +46,9 @@ def _validate_axis(axis, ndim, argname): axis = list(axis) axis = [a + ndim if a < 0 else a for a in axis] if not builtins.all(0 <= a < ndim for a in axis): - raise ValueError("invalid axis for this array in `%s` argument" % argname) + raise ValueError(f"invalid axis for this array in {argname} argument") if len(set(axis)) != len(axis): - raise ValueError("repeated axis in `%s` argument" % argname) + raise ValueError(f"repeated axis in {argname} argument") return axis diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 27740d53d45..8265035a25c 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -236,7 +236,7 @@ def func(self, *args, **kwargs): def inject_reduce_methods(cls): methods = ( [ - (name, getattr(duck_array_ops, "array_%s" % name), False) + (name, getattr(duck_array_ops, f"array_{name}"), False) for name in REDUCE_METHODS ] + [(name, getattr(duck_array_ops, name), True) for name in NAN_REDUCE_METHODS] @@ -275,7 +275,7 @@ def inject_cum_methods(cls): def op_str(name): - return "__%s__" % name + return f"__{name}__" def get_op(name): diff --git a/xarray/core/options.py b/xarray/core/options.py index d53c9d5d7d9..45f45c0dcc5 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -166,8 +166,7 @@ def __init__(self, **kwargs): for k, v in kwargs.items(): if k not in OPTIONS: raise ValueError( - "argument name %r is not in the set of valid options %r" - % (k, set(OPTIONS)) + f"argument name {k!r} is not in the set of valid options {set(OPTIONS)!r}" ) if k in _VALIDATORS and not _VALIDATORS[k](v): if k == ARITHMETIC_JOIN: diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index c2e27a4f339..ee65eb0e458 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -73,7 +73,7 @@ def check_result_variables( def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): - raise TypeError("Expected Dataset, got %s" % type(obj)) + raise TypeError(f"Expected Dataset, got {type(obj)}") if len(obj.data_vars) > 1: raise TypeError( diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 1870cb4f594..4a413902b90 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -146,7 +146,7 @@ def _get_time_bins(index, freq, closed, label, base): if not isinstance(index, CFTimeIndex): raise TypeError( "index must be a CFTimeIndex, but got " - "an instance of %r" % type(index).__name__ + f"an instance of {type(index).__name__!r}" ) if len(index) == 0: datetime_bins = labels = CFTimeIndex(data=[], name=index.name) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index b8fe2a2b89d..31ac43ed214 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -218,7 +218,7 @@ def update_safety_check( if k in first_dict and not compat(v, first_dict[k]): raise ValueError( "unsafe to merge dictionaries without " - "overriding values; conflicting key %r" % k + f"overriding values; conflicting key {k!r}" ) @@ -724,7 +724,7 @@ def __init__(self, data: MutableMapping[K, V], hidden_keys: Iterable[K]): def _raise_if_hidden(self, key: K) -> None: if key in self._hidden_keys: - raise KeyError("Key `%r` is hidden." % key) + raise KeyError(f"Key `{key!r}` is hidden.") # The next five methods are requirements of the ABC. def __setitem__(self, key: K, value: V) -> None: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f063528b700..9f2c6681619 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -150,25 +150,24 @@ def as_variable(obj, name=None) -> "Union[Variable, IndexVariable]": data = as_compatible_data(obj) if data.ndim != 1: raise MissingDimensionsError( - "cannot set variable %r with %r-dimensional data " + f"cannot set variable {name!r} with {data.ndim!r}-dimensional data " "without explicit dimension names. Pass a tuple of " - "(dims, data) instead." % (name, data.ndim) + "(dims, data) instead." ) obj = Variable(name, data, fastpath=True) else: raise TypeError( "unable to convert object into a variable without an " - "explicit list of dimensions: %r" % obj + f"explicit list of dimensions: {obj!r}" ) if name is not None and name in obj.dims: # convert the Variable into an Index if obj.ndim != 1: raise MissingDimensionsError( - "%r has more than 1-dimension and the same name as one of its " - "dimensions %r. xarray disallows such variables because they " - "conflict with the coordinates used to label " - "dimensions." % (name, obj.dims) + f"{name!r} has more than 1-dimension and the same name as one of its " + f"dimensions {obj.dims!r}. xarray disallows such variables because they " + "conflict with the coordinates used to label dimensions." ) obj = obj.to_index_variable() @@ -574,8 +573,8 @@ def _parse_dimensions(self, dims): dims = tuple(dims) if len(dims) != self.ndim: raise ValueError( - "dimensions %s must have the same length as the " - "number of data dimensions, ndim=%s" % (dims, self.ndim) + f"dimensions {dims} must have the same length as the " + f"number of data dimensions, ndim={self.ndim}" ) return dims @@ -840,9 +839,8 @@ def __setitem__(self, key, value): value = as_compatible_data(value) if value.ndim > len(dims): raise ValueError( - "shape mismatch: value array of shape %s could not be " - "broadcast to indexing result with %s dimensions" - % (value.shape, len(dims)) + f"shape mismatch: value array of shape {value.shape} could not be " + f"broadcast to indexing result with {len(dims)} dimensions" ) if value.ndim == 0: value = Variable((), value) @@ -1450,8 +1448,8 @@ def set_dims(self, dims, shape=None): missing_dims = set(self.dims) - set(dims) if missing_dims: raise ValueError( - "new dimensions %r must be a superset of " - "existing dimensions %r" % (dims, self.dims) + f"new dimensions {dims!r} must be a superset of " + f"existing dimensions {self.dims!r}" ) self_dims = set(self.dims) @@ -1475,7 +1473,7 @@ def set_dims(self, dims, shape=None): def _stack_once(self, dims: List[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): - raise ValueError("invalid existing dimensions: %s" % dims) + raise ValueError(f"invalid existing dimensions: {dims}") if new_dim in self.dims: raise ValueError( @@ -1542,7 +1540,7 @@ def _unstack_once_full( new_dim_sizes = tuple(dims.values()) if old_dim not in self.dims: - raise ValueError("invalid existing dimension: %s" % old_dim) + raise ValueError(f"invalid existing dimension: {old_dim}") if set(new_dim_names).intersection(self.dims): raise ValueError( @@ -2514,7 +2512,7 @@ class IndexVariable(Variable): def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): super().__init__(dims, data, attrs, encoding, fastpath) if self.ndim != 1: - raise ValueError("%s objects must be 1-dimensional" % type(self).__name__) + raise ValueError(f"{type(self).__name__} objects must be 1-dimensional") # Unlike in Variable, always eagerly load values into memory if not isinstance(self._data, PandasIndexAdapter): @@ -2565,7 +2563,7 @@ def _finalize_indexing_result(self, dims, data): return self._replace(dims=dims, data=data) def __setitem__(self, key, value): - raise TypeError("%s values cannot be modified" % type(self).__name__) + raise TypeError(f"{type(self).__name__} values cannot be modified") @classmethod def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): @@ -2698,7 +2696,7 @@ def level_names(self): def get_level_variable(self, level): """Return a new IndexVariable from a given MultiIndex level.""" if self.level_names is None: - raise ValueError("IndexVariable %r has no MultiIndex" % self.name) + raise ValueError(f"IndexVariable {self.name!r} has no MultiIndex") index = self.to_index() return type(self)(self.dims, index.get_level_values(level)) @@ -2723,7 +2721,7 @@ def _unified_dims(variables): if len(set(var_dims)) < len(var_dims): raise ValueError( "broadcasting cannot handle duplicate " - "dimensions: %r" % list(var_dims) + f"dimensions: {list(var_dims)!r}" ) for d, s in zip(var_dims, var.shape): if d not in all_dims: @@ -2731,8 +2729,7 @@ def _unified_dims(variables): elif all_dims[d] != s: raise ValueError( "operands cannot be broadcast together " - "with mismatched lengths for dimension %r: %s" - % (d, (all_dims[d], s)) + f"with mismatched lengths for dimension {d!r}: {(all_dims[d], s)}" ) return all_dims @@ -2836,12 +2833,12 @@ def assert_unique_multiindex_level_names(variables): for k, v in level_names.items(): if k in variables: - v.append("(%s)" % k) + v.append(f"({k})") duplicate_names = [v for v in level_names.values() if len(v) > 1] if duplicate_names: conflict_str = "\n".join(", ".join(v) for v in duplicate_names) - raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str) + raise ValueError(f"conflicting MultiIndex level name(s):\n{conflict_str}") # Check confliction between level names and dimensions GH:2299 for k, v in variables.items(): for d in v.dims: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 0235510d2fb..71e6ef35a62 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -319,7 +319,7 @@ def _infer_xy_labels_3d(darray, x, y, rgb): if len(set(not_none)) < len(not_none): raise ValueError( "Dimension names must be None or unique strings, but imshow was " - "passed x=%r, y=%r, and rgb=%r." % (x, y, rgb) + f"passed x={x!r}, y={y!r}, and rgb={rgb!r}." ) for label in not_none: if label not in darray.dims: @@ -341,8 +341,7 @@ def _infer_xy_labels_3d(darray, x, y, rgb): rgb = could_be_color[0] if rgb is not None and darray[rgb].size not in (3, 4): raise ValueError( - "Cannot interpret dim %r of size %s as RGB or RGBA." - % (rgb, darray[rgb].size) + f"Cannot interpret dim {rgb!r} of size {darray[rgb].size} as RGB or RGBA." ) # If rgb dimension is still unknown, there must be two or three dimensions @@ -352,9 +351,9 @@ def _infer_xy_labels_3d(darray, x, y, rgb): rgb = could_be_color[-1] warnings.warn( "Several dimensions of this array could be colors. Xarray " - "will use the last possible dimension (%r) to match " + f"will use the last possible dimension ({rgb!r}) to match " "matplotlib.pyplot.imshow. You can pass names of x, y, " - "and/or rgb dimensions to override this guess." % rgb + "and/or rgb dimensions to override this guess." ) assert rgb is not None @@ -661,15 +660,15 @@ def _rescale_imshow_rgb(darray, vmin, vmax, robust): vmax = 255 if np.issubdtype(darray.dtype, np.integer) else 1 if vmax < vmin: raise ValueError( - "vmin=%r is less than the default vmax (%r) - you must supply " - "a vmax > vmin in this case." % (vmin, vmax) + f"vmin={vmin!r} is less than the default vmax ({vmax!r}) - you must supply " + "a vmax > vmin in this case." ) elif vmin is None: vmin = 0 if vmin > vmax: raise ValueError( - "vmax=%r is less than the default vmin (0) - you must supply " - "a vmin < vmax in this case." % vmax + f"vmax={vmax!r} is less than the default vmin (0) - you must supply " + "a vmin < vmax in this case." ) # Scale interval [vmin .. vmax] to [0 .. 1], with darray as 64-bit float # to avoid precision loss, integer over/underflow, etc with extreme inputs. diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index db102eefdc1..355c5dbed32 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -387,7 +387,7 @@ def test_da_groupby_assign_coords(): @pytest.mark.parametrize("obj", [repr_da, repr_da.to_dataset(name="a")]) def test_groupby_repr(obj, dim): actual = repr(obj.groupby(dim)) - expected = "%sGroupBy" % obj.__class__.__name__ + expected = f"{obj.__class__.__name__}GroupBy" expected += ", grouped over %r" % dim expected += "\n%r groups with labels " % (len(np.unique(obj[dim]))) if dim == "x": @@ -404,7 +404,7 @@ def test_groupby_repr(obj, dim): @pytest.mark.parametrize("obj", [repr_da, repr_da.to_dataset(name="a")]) def test_groupby_repr_datetime(obj): actual = repr(obj.groupby("t.month")) - expected = "%sGroupBy" % obj.__class__.__name__ + expected = f"{obj.__class__.__name__}GroupBy" expected += ", grouped over 'month'" expected += "\n%r groups with labels " % (len(np.unique(obj.t.dt.month))) expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12." diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py index ce01936b9dd..bf80dcf68cd 100644 --- a/xarray/ufuncs.py +++ b/xarray/ufuncs.py @@ -77,8 +77,7 @@ def __call__(self, *args, **kwargs): res = f(*new_args, **kwargs) if res is NotImplemented: raise TypeError( - "%r not implemented for types (%r, %r)" - % (self._name, type(args[0]), type(args[1])) + f"{self._name!r} not implemented for types ({type(args[0])!r}, {type(args[1])!r})" ) return res @@ -127,11 +126,11 @@ def _create_op(name): doc = _remove_unused_reference_labels(_skip_signature(_dedent(doc), name)) func.__doc__ = ( - "xarray specific variant of numpy.%s. Handles " + f"xarray specific variant of numpy.{name}. Handles " "xarray.Dataset, xarray.DataArray, xarray.Variable, " "numpy.ndarray and dask.array.Array objects with " "automatic dispatching.\n\n" - "Documentation from numpy:\n\n%s" % (name, doc) + f"Documentation from numpy:\n\n{doc}" ) return func diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index d643d768093..cd5d425efe2 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -42,15 +42,15 @@ def get_sys_info(): [ ("python", sys.version), ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - # ("Version", "%s" % (version)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get("LC_ALL", "None")), - ("LANG", "%s" % os.environ.get("LANG", "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), + ("OS", f"{sysname}"), + ("OS-release", f"{release}"), + # ("Version", f"{version}"), + ("machine", f"{machine}"), + ("processor", f"{processor}"), + ("byteorder", f"{sys.byteorder}"), + ("LC_ALL", f'{os.environ.get("LC_ALL", "None")}'), + ("LANG", f'{os.environ.get("LANG", "None")}'), + ("LOCALE", f"{locale.getlocale()}"), ] ) except Exception: From ee347410d18aae95052c195980c5b569286218bc Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Apr 2021 15:31:37 -0600 Subject: [PATCH 09/16] Apply suggestions from code review Co-authored-by: Stephan Hoyer --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 849b035bd7d..3c12f9e31c3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -307,7 +307,7 @@ def _dataset_from_backend_dataset( ): if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}: raise ValueError( - "chunks must be an int, dict, 'auto', or None. " f"Instead found {chunks}." + f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." ) _protect_dataset_variables_inplace(backend_ds, cache) From 4d6e78e62ed2b1c35dabec97cec90bcc3706b5a5 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Apr 2021 15:53:12 -0600 Subject: [PATCH 10/16] Revert back to original versions --- xarray/backends/netCDF4_.py | 9 ++++--- xarray/coding/strings.py | 54 ++++++++++++++++++------------------- xarray/core/accessor_dt.py | 10 +++---- xarray/core/accessor_str.py | 10 +++---- 4 files changed, 41 insertions(+), 42 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index fcd7ca72ae4..9d492f04a52 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -159,7 +159,10 @@ def _netcdf4_create_group(dataset, name): def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): - if group not in {None, "", "/"}: + if group in {None, "", "/"}: + # use the root group + return ds + else: # make sure it's a string if not isinstance(group, str): raise ValueError("group must be a string or None") @@ -174,9 +177,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): else: # wrap error to provide slightly more helpful message raise OSError(f"group not found: {key}", e) - - # use the root group - return ds + return ds def _ensure_fill_value_valid(data, attributes): diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index d52ba13caaa..1a43b4e3052 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -130,18 +130,17 @@ def bytes_to_char(arr): if arr.dtype.kind != "S": raise ValueError("argument must have a fixed-width bytes dtype") - if not is_duck_dask_array(arr): - return _numpy_bytes_to_char(arr) - - import dask.array as da - - return da.map_blocks( - _numpy_bytes_to_char, - arr, - dtype="S1", - chunks=arr.chunks + ((arr.dtype.itemsize,)), - new_axis=[arr.ndim], - ) + if is_duck_dask_array(arr): + import dask.array as da + + return da.map_blocks( + _numpy_bytes_to_char, + arr, + dtype="S1", + chunks=arr.chunks + ((arr.dtype.itemsize,)), + new_axis=[arr.ndim], + ) + return _numpy_bytes_to_char(arr) def _numpy_bytes_to_char(arr): @@ -166,25 +165,24 @@ def char_to_bytes(arr): # can't make an S0 dtype return np.zeros(arr.shape[:-1], dtype=np.string_) - if not is_duck_dask_array(arr): - return StackedBytesArray(arr) + if is_duck_dask_array(arr): + import dask.array as da - import dask.array as da + if len(arr.chunks[-1]) > 1: + raise ValueError( + "cannot stacked dask character array with " + f"multiple chunks in the last dimension: {arr}" + ) - if len(arr.chunks[-1]) > 1: - raise ValueError( - "cannot stack dask character array with " - "multiple chunks in the last dimension: {}".format(arr) + dtype = np.dtype("S" + str(arr.shape[-1])) + return da.map_blocks( + _numpy_char_to_bytes, + arr, + dtype=dtype, + chunks=arr.chunks[:-1], + drop_axis=[arr.ndim - 1], ) - - dtype = np.dtype("S" + str(arr.shape[-1])) - return da.map_blocks( - _numpy_char_to_bytes, - arr, - dtype=dtype, - chunks=arr.chunks[:-1], - drop_axis=[arr.ndim - 1], - ) + return StackedBytesArray(arr) def _numpy_char_to_bytes(arr): diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index da3099aa72b..9ffa2035949 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -344,12 +344,12 @@ def isocalendar(self): values = _get_date_field(self._obj.data, "isocalendar", np.int64) obj_type = type(self._obj) - data_vars = { - name: obj_type( - values[i], name=name, coords=self._obj.coords, dims=self._obj.dims + data_vars = {} + for name, value in zip(values, ["year", "week", "weekday"]): + data_vars[name] = obj_type( + values, name=name, coords=self._obj.coords, dims=self._obj.dims ) - for i, name in enumerate(["year", "week", "weekday"]) - } + return Dataset(data_vars) year = Properties._tslib_field_accessor( diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 8e587fffe7e..d50163c435b 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -271,13 +271,13 @@ def _re_compile( if getattr(pat, "dtype", None) != np.object_: pat = self._stringify(pat) func = lambda x: re.compile(x, flags=flags) - if not isinstance(pat, np.ndarray): + if isinstance(pat, np.ndarray): + # apply_ufunc doesn't work for numpy arrays with output object dtypes + func = np.vectorize(func) + return func(pat) + else: return _apply_str_ufunc(func=func, obj=pat, dtype=np.object_) - # apply_ufunc doesn't work for numpy arrays with output object dtypes - func = np.vectorize(func) - return func(pat) - def len(self) -> Any: """ Compute the length of each string in the array. From 1cc604f726a74cb387c5446e6841fd7d6bb15620 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Apr 2021 16:07:17 -0600 Subject: [PATCH 11/16] Revert back to original version --- xarray/core/accessor_dt.py | 60 ++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 9ffa2035949..1d4ef755fa0 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -81,20 +81,20 @@ def _get_date_field(values, name, dtype): else: access_method = _access_through_cftimeindex - if not is_duck_dask_array(values): - return access_method(values, name) - - from dask.array import map_blocks + if is_duck_dask_array(values): + from dask.array import map_blocks - new_axis = chunks = None - # isocalendar adds adds an axis - if name == "isocalendar": - chunks = (3,) + values.chunksize - new_axis = 0 + new_axis = chunks = None + # isocalendar adds adds an axis + if name == "isocalendar": + chunks = (3,) + values.chunksize + new_axis = 0 - return map_blocks( - access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks - ) + return map_blocks( + access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks + ) + else: + return access_method(values, name) def _round_through_series_or_index(values, name, freq): @@ -134,15 +134,15 @@ def _round_field(values, name, freq): Array-like of datetime fields accessed for each element in values """ - if not is_duck_dask_array(values): - return _round_through_series_or_index(values, name, freq) + if is_duck_dask_array(values): + from dask.array import map_blocks - from dask.array import map_blocks - - dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") - return map_blocks( - _round_through_series_or_index, values, name, freq=freq, dtype=dtype - ) + dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") + return map_blocks( + _round_through_series_or_index, values, name, freq=freq, dtype=dtype + ) + else: + return _round_through_series_or_index(values, name, freq) def _strftime_through_cftimeindex(values, date_format): @@ -171,12 +171,12 @@ def _strftime(values, date_format): access_method = _strftime_through_series else: access_method = _strftime_through_cftimeindex - if not is_duck_dask_array(values): - return access_method(values, date_format) + if is_duck_dask_array(values): + from dask.array import map_blocks - from dask.array import map_blocks - - return map_blocks(access_method, values, date_format) + return map_blocks(access_method, values, date_format) + else: + return access_method(values, date_format) class Properties: @@ -345,9 +345,9 @@ def isocalendar(self): obj_type = type(self._obj) data_vars = {} - for name, value in zip(values, ["year", "week", "weekday"]): + for i, name in enumerate(["year", "week", "weekday"]): data_vars[name] = obj_type( - values, name=name, coords=self._obj.coords, dims=self._obj.dims + values[i], name=name, coords=self._obj.coords, dims=self._obj.dims ) return Dataset(data_vars) @@ -386,11 +386,13 @@ def weekofyear(self): ) if LooseVersion(pd.__version__) < "1.1.0": - return Properties._tslib_field_accessor( + weekofyear = Properties._tslib_field_accessor( "weekofyear", "The week ordinal of the year", np.int64 ).fget(self) else: - return self.isocalendar().week + weekofyear = self.isocalendar().week + + return weekofyear week = weekofyear dayofweek = Properties._tslib_field_accessor( From e7b5e84860a1b56e0f52e86d9c1bd860d313e409 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 6 May 2021 07:05:24 -0600 Subject: [PATCH 12/16] Address @alexamici 's comment --- xarray/backends/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 15c2346e2d6..e950baed5e0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -499,7 +499,7 @@ def open_dataset( **decoders, **kwargs, ) - return _dataset_from_backend_dataset( + ds = _dataset_from_backend_dataset( backend_ds, filename_or_obj, engine, @@ -510,6 +510,7 @@ def open_dataset( **decoders, **kwargs, ) + return ds def open_dataarray( From 879e2f564241bb1b779ada325031e95b20d4051f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 7 May 2021 15:40:46 -0600 Subject: [PATCH 13/16] Revert back to original versions --- xarray/backends/h5netcdf_.py | 3 ++- xarray/backends/netCDF4_.py | 5 ++++- xarray/backends/zarr.py | 3 ++- xarray/coding/frequencies.py | 10 +++++----- xarray/coding/strings.py | 5 +++-- xarray/coding/variables.py | 12 +++++------- xarray/core/duck_array_ops.py | 10 +++++----- xarray/core/groupby.py | 3 ++- xarray/core/merge.py | 14 +++++++------- xarray/core/rolling.py | 5 ++++- 10 files changed, 39 insertions(+), 31 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 2ecc607fbb1..9f744d0c1ef 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -377,7 +377,7 @@ def open_dataset( store_entrypoint = StoreBackendEntrypoint() - return store_entrypoint.open_dataset( + ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -387,6 +387,7 @@ def open_dataset( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) + return ds if has_h5netcdf: diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 9d492f04a52..694b0d2fdd2 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -355,7 +355,10 @@ def open( if lock is None: if mode == "r": - lock = NETCDFC_LOCK if is_remote_uri(filename) else NETCDF4_PYTHON_LOCK + if is_remote_uri(filename): + lock = NETCDFC_LOCK + else: + lock = NETCDF4_PYTHON_LOCK else: if format is None or format.startswith("NETCDF4"): base_lock = NETCDF4_PYTHON_LOCK diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 845b98f1dfd..72c4e99265d 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -684,7 +684,7 @@ def open_zarr( "storage_options": storage_options, } - return open_dataset( + ds = open_dataset( filename_or_obj=store, group=group, decode_cf=decode_cf, @@ -699,6 +699,7 @@ def open_zarr( decode_timedelta=decode_timedelta, use_cftime=use_cftime, ) + return ds class ZarrBackendEntrypoint(BackendEntrypoint): diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py index f1695051f95..e9efef8eb7a 100644 --- a/xarray/coding/frequencies.py +++ b/xarray/coding/frequencies.py @@ -232,13 +232,13 @@ def _is_multiple(us, mult: int): def _maybe_add_count(base: str, count: float): """If count is greater than 1, add it to the base offset string""" - if count == 1: + if count != 1: + assert count == int(count) + count = int(count) + return f"{count}{base}" + else: return base - assert count == int(count) - count = int(count) - return f"{count}{base}" - def month_anchor_check(dates): """Return the monthly offset string. diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 1a43b4e3052..c217cb0c865 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -171,7 +171,7 @@ def char_to_bytes(arr): if len(arr.chunks[-1]) > 1: raise ValueError( "cannot stacked dask character array with " - f"multiple chunks in the last dimension: {arr}" + "multiple chunks in the last dimension: {}".format(arr) ) dtype = np.dtype("S" + str(arr.shape[-1])) @@ -182,7 +182,8 @@ def char_to_bytes(arr): chunks=arr.chunks[:-1], drop_axis=[arr.ndim - 1], ) - return StackedBytesArray(arr) + else: + return StackedBytesArray(arr) def _numpy_char_to_bytes(arr): diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 231b531b1a6..0c28eba5f7d 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -77,7 +77,6 @@ def __repr__(self): def lazy_elemwise_func(array, func, dtype): """Lazily apply an element-wise function to an array. - Parameters ---------- array : any valid value of Variable._data @@ -86,17 +85,16 @@ def lazy_elemwise_func(array, func, dtype): this should be a pickle-able object. dtype : coercible to np.dtype Dtype for the result of this function. - Returns ------- Either a dask.array.Array or _ElementwiseFunctionArray. """ - if not is_duck_dask_array(array): - return _ElementwiseFunctionArray(array, func, dtype) + if is_duck_dask_array(array): + import dask.array as da - import dask.array as da - - return da.map_blocks(func, array, dtype=dtype) + return da.map_blocks(func, array, dtype=dtype) + else: + return _ElementwiseFunctionArray(array, func, dtype) def unpack_for_encoding(var): diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 37ed5e97262..e32fd4be376 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -427,9 +427,7 @@ def _datetime_nanmin(array): def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): """Convert an array containing datetime-like data to numerical values. - Convert the datetime array to a timedelta relative to an offset. - Parameters ---------- array : array-like @@ -442,12 +440,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): conversions are not allowed due to non-linear relationships between units. dtype : dtype Output dtype. - Returns ------- array Numerical representation of datetime object relative to an offset. - Notes ----- Some datetime unit conversions won't work, for example from days to years, even @@ -457,7 +453,11 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # TODO: make this function dask-compatible? # Set offset to minimum if not given if offset is None: - offset = _datetime_nanmin(array) if array.dtype.kind in "Mm" else min(array) + if array.dtype.kind in "Mm": + offset = _datetime_nanmin(array) + else: + offset = min(array) + # Compute timedelta object. # For np.datetime64, this can silently yield garbage due to overflow. # One option is to enforce 1970-01-01 as the universal offset. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 073bddcbef9..c73ef738a29 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -642,7 +642,7 @@ def quantile( if dim is None: dim = self._group_dim - return self.map( + out = self.map( self._obj.__class__.quantile, shortcut=False, q=q, @@ -651,6 +651,7 @@ def quantile( keep_attrs=keep_attrs, skipna=skipna, ) + return out def where(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 77d3499c468..f5986b4763d 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -129,13 +129,13 @@ def unique_variable( if equals is not True: break - if equals is None: - # now compare values with minimum number of computes - out = out.compute() - for var in variables[1:]: - equals = getattr(out, compat)(var) - if not equals: - break + if equals is None: + # now compare values with minimum number of computes + out = out.compute() + for var in variables[1:]: + equals = getattr(out, compat)(var) + if not equals: + break if not equals: raise MergeError( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index b1e393e7983..870df122aa9 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -433,7 +433,10 @@ def reduce(self, func, keep_attrs=None, **kwargs): # save memory with reductions GH4325 fillna = kwargs.pop("fillna", dtypes.NA) - obj = self.obj.fillna(fillna) if fillna is not dtypes.NA else self.obj + if fillna is not dtypes.NA: + obj = self.obj.fillna(fillna) + else: + obj = self.obj windows = self._construct( obj, rolling_dim, keep_attrs=keep_attrs, fill_value=fillna ) From e853ee8a61f8c32db431a8d50a1a677086f1aad8 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 7 May 2021 15:55:23 -0600 Subject: [PATCH 14/16] Revert back to original versions --- xarray/backends/pydap_.py | 3 +- xarray/core/coordinates.py | 81 +++++++++++++++++++------------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 30ef9a91625..25d2df9d76a 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -141,7 +141,7 @@ def open_dataset( store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): - return store_entrypoint.open_dataset( + ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -151,6 +151,7 @@ def open_dataset( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) + return ds if has_pydap: diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index d397364a5e0..37e1dbc7342 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -107,51 +107,50 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: (dim,) = ordered_dims return self._data.get_index(dim) # type: ignore[attr-defined] else: - return self._compute_multi_index(ordered_dims) - - def _compute_multi_index(self, ordered_dims): - indexes = [ - self._data.get_index(k) for k in ordered_dims # type: ignore[attr-defined] - ] + indexes = [ + self._data.get_index(k) for k in ordered_dims # type: ignore[attr-defined] + ] - # compute the sizes of the repeat and tile for the cartesian product - # (taken from pandas.core.reshape.util) - index_lengths = np.fromiter((len(index) for index in indexes), dtype=np.intp) - cumprod_lengths = np.cumproduct(index_lengths) + # compute the sizes of the repeat and tile for the cartesian product + # (taken from pandas.core.reshape.util) + index_lengths = np.fromiter( + (len(index) for index in indexes), dtype=np.intp + ) + cumprod_lengths = np.cumproduct(index_lengths) - if cumprod_lengths[-1] == 0: - # if any factor is empty, the cartesian product is empty - repeat_counts = np.zeros_like(cumprod_lengths) + if cumprod_lengths[-1] == 0: + # if any factor is empty, the cartesian product is empty + repeat_counts = np.zeros_like(cumprod_lengths) - else: - # sizes of the repeats - repeat_counts = cumprod_lengths[-1] / cumprod_lengths - # sizes of the tiles - tile_counts = np.roll(cumprod_lengths, 1) - tile_counts[0] = 1 - - # loop over the indexes - # for each MultiIndex or Index compute the cartesian product of the codes - - code_list = [] - level_list = [] - names = [] - - for i, index in enumerate(indexes): - if isinstance(index, pd.MultiIndex): - codes, levels = index.codes, index.levels else: - code, level = pd.factorize(index) - codes = [code] - levels = [level] - - # compute the cartesian product - code_list += [ - np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) - for code in codes - ] - level_list += levels - names += index.names + # sizes of the repeats + repeat_counts = cumprod_lengths[-1] / cumprod_lengths + # sizes of the tiles + tile_counts = np.roll(cumprod_lengths, 1) + tile_counts[0] = 1 + + # loop over the indexes + # for each MultiIndex or Index compute the cartesian product of the codes + + code_list = [] + level_list = [] + names = [] + + for i, index in enumerate(indexes): + if isinstance(index, pd.MultiIndex): + codes, levels = index.codes, index.levels + else: + code, level = pd.factorize(index) + codes = [code] + levels = [level] + + # compute the cartesian product + code_list += [ + np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) + for code in codes + ] + level_list += levels + names += index.names return pd.MultiIndex(level_list, code_list, names=names) From ca769d42840da9fcdcbea8cf3200f5a1605aa6e1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 12 May 2021 09:59:06 -0600 Subject: [PATCH 15/16] Revert back to original version --- xarray/backends/plugins.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 123f9efb9c7..d892e8761a7 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -70,12 +70,10 @@ def set_missing_parameters(backend_entrypoints): def sort_backends(backend_entrypoints): - ordered_backends_entrypoints = { - be_name: backend_entrypoints.pop(be_name) - for be_name in STANDARD_BACKENDS_ORDER - if be_name in backend_entrypoints - } - + ordered_backends_entrypoints = {} + for be_name in STANDARD_BACKENDS_ORDER: + if backend_entrypoints.get(be_name, None) is not None: + ordered_backends_entrypoints[be_name] = backend_entrypoints.pop(be_name) ordered_backends_entrypoints.update( {name: backend_entrypoints[name] for name in sorted(backend_entrypoints)} ) From 9024b30307e2de6fa395ab5321a5b7edb911db52 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 12 May 2021 11:06:34 -0600 Subject: [PATCH 16/16] Fix missing empty line --- xarray/coding/variables.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 0c28eba5f7d..1ebaab1be02 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -85,6 +85,7 @@ def lazy_elemwise_func(array, func, dtype): this should be a pickle-able object. dtype : coercible to np.dtype Dtype for the result of this function. + Returns ------- Either a dask.array.Array or _ElementwiseFunctionArray.