diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 0168116cdca..30fe3384477 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -236,20 +236,19 @@ def encode(self, variable: Variable, name: T_Name = None): f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) + # cast to correct dtype in case of times + # see GH 7817 + units = attrs.get("units", None) + if isinstance(units, str) and "since" in units: + encoded_dtype = encoding.pop("dtype") + if encoded_dtype is not None and encoded_dtype != data.dtype: + data = np.asarray(data, dtype=encoded_dtype) + if fv_exists: # Ensure _FillValue is cast to same dtype as data's encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) - # retrieve _FillValue in case of np.datetime64 - # see GH 7817 - if np.issubdtype(data.dtype, np.datetime64): - units = encoding.get("units", None) - if isinstance(units, str) and "since" in units: - delta, _ = times._unpack_netcdf_time_units(units) - delta = times._netcdf_to_numpy_timeunit(delta) - fill_value = np.datetime64(fill_value.item(), delta).astype( - "datetime64[ns]" - ) + if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) @@ -284,19 +283,19 @@ def decode(self, variable: Variable, name: T_Name = None): SerializationWarning, stacklevel=3, ) - - dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) - # retrieve _FillValue in case of np.datetime64 - # see GH 7817 - if np.issubdtype(data.dtype, np.datetime64) and decoded_fill_value.astype( - np.int64 - ) == np.datetime64("NaT").astype(np.int64): - delta, _ = times._unpack_netcdf_time_units(encoding["units"]) - delta = times._netcdf_to_numpy_timeunit(delta) - encoded_fill_values = { - np.datetime64(encfill.item(), delta).astype("datetime64[ns]") - for encfill in encoded_fill_values - } + units = attrs.get("units", None) + # try to cast to correct dtypes for data and fill_value + # GH 7817 + if (( + isinstance(units, str) + and "since" in units + and np.issubdtype(data.dtype, np.integer)) or np.issubdtype(data.dtype, np.datetime64) + ): + dtype, decoded_fill_value = data.dtype, np.datetime64("NaT").astype( + data.dtype + ) + else: + dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) if encoded_fill_values: transform = partial( _apply_mask, diff --git a/xarray/conventions.py b/xarray/conventions.py index ca7ceeb717c..1506efc31e8 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -174,10 +174,10 @@ def encode_cf_variable( ensure_not_multiindex(var, name=name) for coder in [ - variables.CFScaleOffsetCoder(), - variables.CFMaskCoder(), times.CFDatetimeCoder(), times.CFTimedeltaCoder(), + variables.CFScaleOffsetCoder(), + variables.CFMaskCoder(), variables.UnsignedIntegerCoder(), variables.NonStringCoder(), variables.DefaultFillvalueCoder(), @@ -263,13 +263,6 @@ def decode_cf_variable( var = strings.CharacterArrayCoder().decode(var, name=name) var = strings.EncodedStringCoder().decode(var) - # time decoding before masking - # GH 7817 - if decode_timedelta: - var = times.CFTimedeltaCoder().decode(var, name=name) - if decode_times: - var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) - if mask_and_scale: for coder in [ variables.UnsignedIntegerCoder(), @@ -278,6 +271,11 @@ def decode_cf_variable( ]: var = coder.decode(var, name=name) + if decode_timedelta: + var = times.CFTimedeltaCoder().decode(var, name=name) + if decode_times: + var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) + if decode_endianness and not var.dtype.isnative: var = variables.EndianCoder().decode(var) original_dtype = var.dtype