Shape squeeze edge case bug fix (#911)

* edge case with singleton values fix of imputation methods
awslabs · Jul 9, 2020 · 134da35 · 134da35
1 parent 09021ec
commit 134da35
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 24 deletions.
diff --git a/src/gluonts/transform/feature.py b/src/gluonts/transform/feature.py
@@ -87,6 +87,8 @@ class MeanValueImputation(MissingValueImputation):
  """
 
  def __call__(self, values: np.ndarray) -> np.ndarray:
+ if len(values) == 1:
+ return DummyValueImputation()(values)
  nan_indices = np.where(np.isnan(values))
  values[nan_indices] = np.nanmean(values)
  return values
@@ -99,6 +101,8 @@ class LastValueImputation(MissingValueImputation):
  """
 
  def __call__(self, values: np.ndarray) -> np.ndarray:
+ if len(values) == 1:
+ return DummyValueImputation()(values)
  values = np.expand_dims(values, axis=0)
 
  mask = np.isnan(values)
@@ -107,7 +111,6 @@ def __call__(self, values: np.ndarray) -> np.ndarray:
  out = values[np.arange(idx.shape[0])[:, None], idx]
 
  values = np.squeeze(out)
-
  # in case we need to replace nan at the start of the array
  mask = np.isnan(values)
  values[mask] = np.interp(
@@ -124,6 +127,8 @@ class CausalMeanValueImputation(MissingValueImputation):
  """
 
  def __call__(self, values: np.ndarray) -> np.ndarray:
+ if len(values) == 1:
+ return DummyValueImputation()(values)
  mask = np.isnan(values)
 
  # we cannot compute the mean with this method if there are nans
@@ -160,6 +165,8 @@ def __init__(self, window_size: int = 10) -> None:
  self.window_size = 1 if window_size < 1 else window_size
 
  def __call__(self, values: np.ndarray) -> np.ndarray:
+ if len(values) == 1:
+ return DummyValueImputation()(values)
  mask = np.isnan(values)
 
  # we cannot compute the mean with this method if there are nans

diff --git a/test/test_transform.py b/test/test_transform.py
@@ -894,9 +894,11 @@ def test_AddObservedIndicator():
  Tests the different methods to impute missing values.
  """
 
- array_value = np.array(
- [np.nan, 1.0, 1.0, np.nan, 2.0, np.nan, 1.0, np.nan]
- )
+ array_values = [
+ np.array([np.nan, 1.0, 1.0, np.nan, 2.0, np.nan, 1.0, np.nan]),
+ np.array([np.nan]),
+ np.array([10.0]),
+ ]
 
  l_methods = [
  "dummy_value",
@@ -916,32 +918,61 @@ def test_AddObservedIndicator():
  "rolling_mean10": RollingMeanValueImputation(10),
  }
 
- d_expected_result = {
- "dummy_value": np.array([0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0]),
- "mean": np.array([1.25, 1.0, 1.0, 1.25, 2.0, 1.25, 1.0, 1.25]),
- "causal_mean": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.2, 1.0, 9 / 7]),
- "last_value": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
- "rolling_mean10": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.1, 1.0, 1.2]),
- "rolling_mean1": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
+ d_expected_results = {
+ "dummy_value": [
+ np.array([0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0]),
+ np.array([0.0]),
+ np.array([10.0]),
+ ],
+ "mean": [
+ np.array([1.25, 1.0, 1.0, 1.25, 2.0, 1.25, 1.0, 1.25]),
+ np.array([0.0]),
+ np.array([10.0]),
+ ],
+ "causal_mean": [
+ np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.2, 1.0, 9 / 7]),
+ np.array([0.0]),
+ np.array([10.0]),
+ ],
+ "last_value": [
+ np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
+ np.array([0.0]),
+ np.array([10.0]),
+ ],
+ "rolling_mean10": [
+ np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.1, 1.0, 1.2]),
+ np.array([0.0]),
+ np.array([10.0]),
+ ],
+ "rolling_mean1": [
+ np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
+ np.array([0.0]),
+ np.array([10.0]),
+ ],
  }
 
- expected_missindicator = np.array([0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0])
+ expected_missindicators = [
+ np.array([0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
+ np.array([0.0]),
+ np.array([1.0]),
+ ]
 
- for method in l_methods:
- transfo = transform.AddObservedValuesIndicator(
- target_field=FieldName.TARGET,
- output_field=FieldName.OBSERVED_VALUES,
- imputation_method=d_method_instances[method],
- )
+ for i, array_value in enumerate(array_values):
+ for method in l_methods:
+ transfo = transform.AddObservedValuesIndicator(
+ target_field=FieldName.TARGET,
+ output_field=FieldName.OBSERVED_VALUES,
+ imputation_method=d_method_instances[method],
+ )
 
- d = {"target": array_value.copy()}
+  d = {"target": array_value.copy()}
 
- res = transfo.transform(d)
+  res = transfo.transform(d)
 
- assert np.array_equal(d_expected_result[method], res["target"])
- assert np.array_equal(
- expected_missindicator, res[FieldName.OBSERVED_VALUES]
- )
+  assert np.array_equal(d_expected_results[method][i], res["target"])
+  assert np.array_equal(
+  expected_missindicators[i], res[FieldName.OBSERVED_VALUES]
+  )
 
 
 def make_dataset(N, train_length):