Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shape squeeze edge case bug fix #911

Merged
merged 5 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/gluonts/transform/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ class MeanValueImputation(MissingValueImputation):
"""

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to pass a user specified dummy_value when constructing the DummyValueImputation object here? Essentially, all these classes would allow specifying the dummy_value, to be used in extreme cases.

But maybe not urgent and can be addressed later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, so the DummyValueImputation accepts a value that uses to impute. With the current implementation it uses the default. We can always add one more argument to all the imputation methods to override the default but maybe this is too much. If needed we can always include it easily.

nan_indices = np.where(np.isnan(values))
values[nan_indices] = np.nanmean(values)
return values
Expand All @@ -99,6 +101,8 @@ class LastValueImputation(MissingValueImputation):
"""

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
lostella marked this conversation as resolved.
Show resolved Hide resolved
values = np.expand_dims(values, axis=0)

mask = np.isnan(values)
Expand All @@ -107,7 +111,6 @@ def __call__(self, values: np.ndarray) -> np.ndarray:
out = values[np.arange(idx.shape[0])[:, None], idx]

values = np.squeeze(out)

# in case we need to replace nan at the start of the array
mask = np.isnan(values)
values[mask] = np.interp(
Expand All @@ -124,6 +127,8 @@ class CausalMeanValueImputation(MissingValueImputation):
"""

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
mask = np.isnan(values)

# we cannot compute the mean with this method if there are nans
Expand Down Expand Up @@ -160,6 +165,8 @@ def __init__(self, window_size: int = 10) -> None:
self.window_size = 1 if window_size < 1 else window_size

def __call__(self, values: np.ndarray) -> np.ndarray:
if len(values) == 1:
return DummyValueImputation()(values)
mask = np.isnan(values)

# we cannot compute the mean with this method if there are nans
Expand Down
77 changes: 54 additions & 23 deletions test/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,9 +894,11 @@ def test_AddObservedIndicator():
Tests the different methods to impute missing values.
"""

array_value = np.array(
[np.nan, 1.0, 1.0, np.nan, 2.0, np.nan, 1.0, np.nan]
)
array_values = [
np.array([np.nan, 1.0, 1.0, np.nan, 2.0, np.nan, 1.0, np.nan]),
np.array([np.nan]),
np.array([10.0]),
]

l_methods = [
"dummy_value",
Expand All @@ -916,32 +918,61 @@ def test_AddObservedIndicator():
"rolling_mean10": RollingMeanValueImputation(10),
}

d_expected_result = {
"dummy_value": np.array([0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0]),
"mean": np.array([1.25, 1.0, 1.0, 1.25, 2.0, 1.25, 1.0, 1.25]),
"causal_mean": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.2, 1.0, 9 / 7]),
"last_value": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
"rolling_mean10": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.1, 1.0, 1.2]),
"rolling_mean1": np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
d_expected_results = {
"dummy_value": [
np.array([0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0]),
np.array([0.0]),
np.array([10.0]),
],
"mean": [
np.array([1.25, 1.0, 1.0, 1.25, 2.0, 1.25, 1.0, 1.25]),
np.array([0.0]),
np.array([10.0]),
],
"causal_mean": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.2, 1.0, 9 / 7]),
np.array([0.0]),
np.array([10.0]),
],
"last_value": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
np.array([0.0]),
np.array([10.0]),
],
"rolling_mean10": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 1.1, 1.0, 1.2]),
np.array([0.0]),
np.array([10.0]),
],
"rolling_mean1": [
np.array([1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]),
np.array([0.0]),
np.array([10.0]),
],
}

expected_missindicator = np.array([0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0])
expected_missindicators = [
np.array([0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
np.array([0.0]),
np.array([1.0]),
]

for method in l_methods:
transfo = transform.AddObservedValuesIndicator(
target_field=FieldName.TARGET,
output_field=FieldName.OBSERVED_VALUES,
imputation_method=d_method_instances[method],
)
for i, array_value in enumerate(array_values):
for method in l_methods:
transfo = transform.AddObservedValuesIndicator(
target_field=FieldName.TARGET,
output_field=FieldName.OBSERVED_VALUES,
imputation_method=d_method_instances[method],
)

d = {"target": array_value.copy()}
d = {"target": array_value.copy()}

res = transfo.transform(d)
res = transfo.transform(d)

assert np.array_equal(d_expected_result[method], res["target"])
assert np.array_equal(
expected_missindicator, res[FieldName.OBSERVED_VALUES]
)
assert np.array_equal(d_expected_results[method][i], res["target"])
assert np.array_equal(
expected_missindicators[i], res[FieldName.OBSERVED_VALUES]
)


def make_dataset(N, train_length):
Expand Down