Skip to content

Commit

Permalink
Shape squeeze edge case bug fix (#911)
Browse files Browse the repository at this point in the history
* edge case with singleton values fix of imputation methods
  • Loading branch information
benidis authored Jul 9, 2020
1 parent 09021ec commit 134da35
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 24 deletions.
9 changes: 8 additions & 1 deletion src/gluonts/transform/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ class MeanValueImputation(MissingValueImputation):
"""

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
nan_indices = np.where(np.isnan(values))
values[nan_indices] = np.nanmean(values)
return values
Expand All @@ -99,6 +101,8 @@ class LastValueImputation(MissingValueImputation):
"""

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
values = np.expand_dims(values, axis=0)

mask = np.isnan(values)
Expand All @@ -107,7 +111,6 @@ def __call__(self, values: np.ndarray) -> np.ndarray:
out = values[np.arange(idx.shape[0])[:, None], idx]

values = np.squeeze(out)

# in case we need to replace nan at the start of the array
mask = np.isnan(values)
values[mask] = np.interp(
Expand All @@ -124,6 +127,8 @@ class CausalMeanValueImputation(MissingValueImputation):
"""

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
mask = np.isnan(values)

# we cannot compute the mean with this method if there are nans
Expand Down Expand Up @@ -160,6 +165,8 @@ def __init__(self, window_size: int = 10) -> None:
self.window_size = 1 if window_size < 1 else window_size

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
mask = np.isnan(values)

# we cannot compute the mean with this method if there are nans
Expand Down
77 changes: 54 additions & 23 deletions test/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,9 +894,11 @@ def test_AddObservedIndicator():
Tests the different methods to impute missing values.
"""

array_value = np.array(
[np.nan, 1.0, 1.0, np.nan, 2.0, np.nan, 1.0, np.nan]
)
array_values = [
np.array([np.nan, 1.0, 1.0, np.nan, 2.0, np.nan, 1.0, np.nan]),
np.array([np.nan]),
np.array([10.0]),
]

l_methods = [
"dummy_value",
Expand All @@ -916,32 +918,61 @@ def test_AddObservedIndicator():
"rolling_mean10": RollingMeanValueImputation(10),
}

d_expected_result = {
"dummy_value": np.array([0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0]),
"mean": np.array([1.25, 1.0, 1.0, 1.25, 2.0, 1.25, 1.0, 1.25]),
"causal_mean": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.2, 1.0, 9 / 7]),
"last_value": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
"rolling_mean10": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.1, 1.0, 1.2]),
"rolling_mean1": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
d_expected_results = {
"dummy_value": [
np.array([0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0]),
np.array([0.0]),
np.array([10.0]),
],
"mean": [
np.array([1.25, 1.0, 1.0, 1.25, 2.0, 1.25, 1.0, 1.25]),
np.array([0.0]),
np.array([10.0]),
],
"causal_mean": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.2, 1.0, 9 / 7]),
np.array([0.0]),
np.array([10.0]),
],
"last_value": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
np.array([0.0]),
np.array([10.0]),
],
"rolling_mean10": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.1, 1.0, 1.2]),
np.array([0.0]),
np.array([10.0]),
],
"rolling_mean1": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
np.array([0.0]),
np.array([10.0]),
],
}

expected_missindicator = np.array([0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0])
expected_missindicators = [
np.array([0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
np.array([0.0]),
np.array([1.0]),
]

for method in l_methods:
transfo = transform.AddObservedValuesIndicator(
target_field=FieldName.TARGET,
output_field=FieldName.OBSERVED_VALUES,
imputation_method=d_method_instances[method],
)
for i, array_value in enumerate(array_values):
for method in l_methods:
transfo = transform.AddObservedValuesIndicator(
target_field=FieldName.TARGET,
output_field=FieldName.OBSERVED_VALUES,
imputation_method=d_method_instances[method],
)

d = {"target": array_value.copy()}
d = {"target": array_value.copy()}

res = transfo.transform(d)
res = transfo.transform(d)

assert np.array_equal(d_expected_result[method], res["target"])
assert np.array_equal(
expected_missindicator, res[FieldName.OBSERVED_VALUES]
)
assert np.array_equal(d_expected_results[method][i], res["target"])
assert np.array_equal(
expected_missindicators[i], res[FieldName.OBSERVED_VALUES]
)


def make_dataset(N, train_length):
Expand Down

0 comments on commit 134da35

Please sign in to comment.