Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multiseries support to graph_prediction_vs_actual_over_time #4284

Merged
merged 15 commits into from
Aug 21, 2023
53 changes: 26 additions & 27 deletions evalml/model_understanding/visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def graph_prediction_vs_actual_over_time(
dates,
)

fig = None
if is_multiseries(pipeline.problem_type):
MichaelFu512 marked this conversation as resolved.
Show resolved Hide resolved
id_list = (
[single_series] if single_series is not None else data["series_id"].unique()
Expand Down Expand Up @@ -481,8 +482,6 @@ def graph_prediction_vs_actual_over_time(
fig.update_yaxes(title_text=y.name)
if single_series is not None:
fig.update_layout(
height=600,
width=1000,
title_text=f"Graph for Series {single_series}",
)
else:
Expand All @@ -491,32 +490,32 @@ def graph_prediction_vs_actual_over_time(
width=1500,
title_text="Graph for Multiseries",
)
return fig

data = [
_go.Scatter(
x=data["dates"],
y=data["target"],
mode="lines+markers",
name="Target",
line=dict(color="#1f77b4"),
),
_go.Scatter(
x=data["dates"],
y=data["prediction"],
mode="lines+markers",
name="Prediction",
line=dict(color="#d62728"),
),
]
# Let plotly pick the best date format.
layout = _go.Layout(
title={"text": "Prediction vs Target over time"},
xaxis={"title": "Time"},
yaxis={"title": "Target Values and Predictions"},
)
else:
data = [
_go.Scatter(
x=data["dates"],
y=data["target"],
mode="lines+markers",
name="Target",
line=dict(color="#1f77b4"),
),
_go.Scatter(
x=data["dates"],
y=data["prediction"],
mode="lines+markers",
name="Prediction",
line=dict(color="#d62728"),
),
]
# Let plotly pick the best date format.
layout = _go.Layout(
title={"text": "Prediction vs Target over time"},
xaxis={"title": "Time"},
yaxis={"title": "Target Values and Predictions"},
)

return _go.Figure(data=data, layout=layout)
fig = _go.Figure(data=data, layout=layout)
return fig


def get_linear_coefficients(estimator, features=None):
Expand Down
62 changes: 12 additions & 50 deletions evalml/tests/model_understanding_tests/test_visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,57 +472,13 @@ class NotTSPipeline:
)


def test_graph_prediction_vs_actual_over_time_multiseries_single(
multiseries_ts_data_stacked,
go,
component_graph_multiseries,
pipeline_parameters_multiseries,
):
X, y = multiseries_ts_data_stacked
X_train, _, y_train, _ = split_multiseries_data(
X,
y,
"series_id",
"date",
)
pipeline = MultiseriesRegressionPipeline(
component_graph_multiseries,
pipeline_parameters_multiseries,
)
pipeline.fit(X_train, y_train)
fig = graph_prediction_vs_actual_over_time(
pipeline,
X,
y,
X_train,
y_train,
X["date"],
"1",
)
assert isinstance(fig, go.Figure)
fig_dict = fig.to_dict()

assert fig_dict["layout"]["title"]["text"] == "Graph for Series 1"
assert fig_dict["layout"]["xaxis"]["title"]["text"] == "Time"
assert fig_dict["layout"]["yaxis"]["title"]["text"] == "target"
assert len(fig_dict["data"]) == 2

assert len(fig_dict["data"][0]["x"]) == len(X["date"].unique())
assert len(fig_dict["data"][0]["y"]) == len(X["date"].unique())
assert not np.isnan(fig_dict["data"][0]["y"]).all()
assert fig_dict["data"][0]["name"] == "Series 1: Target"

assert len(fig_dict["data"][1]["x"]) == len(X["date"].unique())
assert len(fig_dict["data"][1]["y"]) == len(X["date"].unique())
assert not np.isnan(fig_dict["data"][1]["y"]).all()
assert fig_dict["data"][1]["name"] == "Series 1: Prediction"


@pytest.mark.parametrize("single_series", ["0", None])
def test_graph_prediction_vs_actual_over_time_multiseries(
multiseries_ts_data_stacked,
go,
component_graph_multiseries,
pipeline_parameters_multiseries,
single_series,
):
X, y = multiseries_ts_data_stacked
X_train, _, y_train, _ = split_multiseries_data(
Expand All @@ -543,17 +499,23 @@ def test_graph_prediction_vs_actual_over_time_multiseries(
X_train,
y_train,
X["date"],
single_series=single_series,
)
assert isinstance(fig, go.Figure)

fig_dict = fig.to_dict()
assert fig_dict["layout"]["title"]["text"] == "Graph for Multiseries"

if single_series is not None:
assert fig_dict["layout"]["title"]["text"] == "Graph for Series 0"
assert len(fig_dict["data"]) == 2
else:
assert fig_dict["layout"]["title"]["text"] == "Graph for Multiseries"
# there's 5 series, and each series has two lines (one each for target/prediction)
assert len(fig_dict["data"]) == 10

assert fig_dict["layout"]["xaxis"]["title"]["text"] == "Time"
assert fig_dict["layout"]["yaxis"]["title"]["text"] == "target"

# there's 5 series, and each series has two lines (one each for target/prediction)
assert len(fig_dict["data"]) == 10

curr_series = 0
for i in range(len(fig_dict["data"])):
assert len(fig_dict["data"][i]["x"]) == len(X["date"].unique())
Expand Down
1 change: 1 addition & 0 deletions evalml/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@
_determine_downcast_type,
_get_incompatible_nullable_types,
)
from evalml.utils.lazy_loader import LazyLoader
MichaelFu512 marked this conversation as resolved.
Show resolved Hide resolved
Loading