Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Enhanced MLOS Analyzer Dashboard with Advanced Visualizations as a Library #944

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
45 changes: 45 additions & 0 deletions mlos_analyzer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# MLOS Analyzer Dashboard

This project provides a comprehensive dashboard components for analyzing experiments conducted using MLOS. The dashboard components enables users to visualize experiment results, analyze performance metrics, and conduct statistical analyses interactively.

The dashboard components can also be used within a notebook, or streamlit, or any platform which supports plotly.

Another use would be to automate the process of running statistical significance tests to analyze and identify meaningful differences between configuration sets. It enables users to streamline performance analysis by automatically detecting which configurations yield compelling performance improvements.

## Features

1. **Experiment Overview**:

- View dataset statistics and configuration distributions.
- Inspect the overall performance of your experiments.

1. **Performance Analysis**:

- Visualize metrics with whisker plots and heatmaps.
- Perform advanced analysis using parallel coordinates and performance radar plots.

1. **Time Series Analysis**:

- Analyze metrics over time.
- Apply moving average filters for better trend visualization.

1. **Distribution Analysis**:

- View metric distributions with histogram and violin plots.

1. **Failure Analysis**:

- Visualize success/failure distributions.
- Analyze failure rates across different configurations.

1. **Statistical Analysis**:

- Perform pairwise statistical tests for configuration comparison.
- Compare score distributions between different configurations.

## Installation

```bash
pip install -r requirements.txt
python setup.py install
```
144 changes: 144 additions & 0 deletions mlos_analyzer/example_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#

# Run as "streamlit run example_usage.py"

import streamlit as st
from mlos_analyzer.core.storage import storage
from mlos_analyzer.visualization.correlation import (
plot_correlation_table_target,
plot_heatmap,
)
from mlos_analyzer.visualization.distributions import (
plot_metric_distribution,
plot_violin_comparison,
)
from mlos_analyzer.visualization.failure_metrics import (
plot_failure_rate_by_config,
plot_success_failure_distribution,
)
from mlos_analyzer.visualization.performance import (
plot_parallel_coordinates,
plot_performance_radar,
)
from mlos_analyzer.visualization.plots import plot_whisker_plots
from mlos_analyzer.visualization.statistical import (
compare_score_distributions,
run_pairwise_stat_tests,
)
from mlos_analyzer.visualization.timeseries import (
plot_metric_over_time,
plot_moving_average,
)


def main():
st.set_page_config(page_title="MLOS Analyzer Dashboard", layout="wide")
st.title("MLOS Experiment Analysis Dashboard")

st.sidebar.header("Settings")
experiment_ids = list(storage.experiments.keys())
selected_experiment = st.sidebar.selectbox("Select Experiment", experiment_ids)

if selected_experiment:
df = storage.experiments[selected_experiment].results_df
metrics = [col for col in df.columns if col.startswith("result")]

tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(
["Overview", "Performance", "Time Series", "Distributions", "Failures", "Statistics"]
)

with tab1:
st.header("Experiment Overview")
col1, col2 = st.columns(2)
with col1:
st.subheader("Dataset Info")
st.write(df.describe())
with col2:
st.subheader("Configuration Distribution")
config_counts = df["tunable_config_id"].value_counts()
st.bar_chart(config_counts)

with tab2:
st.header("Performance Analysis")
selected_metric = st.selectbox("Select Metric", metrics, key="perf_metric")

col1, col2 = st.columns(2)
with col1:
fig_whisker = plot_whisker_plots(df, selected_metric)
st.plotly_chart(fig_whisker)
with col2:
fig_heatmap = plot_heatmap(df)
st.plotly_chart(fig_heatmap)

selected_metrics = st.multiselect(
"Select Metrics for Advanced Analysis", metrics, default=metrics[:3]
)
if selected_metrics:
col3, col4 = st.columns(2)
with col3:
fig = plot_parallel_coordinates(df, selected_metrics)
st.plotly_chart(fig)
with col4:
fig = plot_performance_radar(df, selected_metrics)
st.plotly_chart(fig)

with tab3:
st.header("Time Series Analysis")
metric = st.selectbox("Select Metric", metrics, key="ts_metric")
window = st.slider("Moving Average Window", 2, 20, 5)

col1, col2 = st.columns(2)
with col1:
fig = plot_metric_over_time(df, metric)
st.plotly_chart(fig)
with col2:
fig = plot_moving_average(df, metric, window)
st.plotly_chart(fig)

with tab4:
st.header("Distribution Analysis")
metric = st.selectbox("Select Metric", metrics, key="dist_metric")

col1, col2 = st.columns(2)
with col1:
fig = plot_metric_distribution(df, metric)
st.plotly_chart(fig)
with col2:
fig = plot_violin_comparison(df, metric)
st.plotly_chart(fig)

with tab5:
st.header("Failure Analysis")
col1, col2 = st.columns(2)
with col1:
fig_dist = plot_success_failure_distribution(df)
st.plotly_chart(fig_dist)
with col2:
fig_rate = plot_failure_rate_by_config(df)
st.plotly_chart(fig_rate)

with tab6:
st.header("Statistical Analysis")
test_metric = st.selectbox("Select Test Metric", metrics)
alpha = st.slider("Significance Level (α)", 0.01, 0.10, 0.05)

results = run_pairwise_stat_tests(df, test_metric, alpha=alpha)
st.dataframe(results)

st.subheader("Configuration Comparison")
config1, config2 = st.columns(2)
with config1:
cfg1 = st.selectbox("First Configuration", df["tunable_config_id"].unique())
with config2:
cfg2 = st.selectbox("Second Configuration", df["tunable_config_id"].unique())

if cfg1 != cfg2:
fig_compare = compare_score_distributions(df, test_metric, cfg1, cfg2)
st.plotly_chart(fig_compare)


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions mlos_analyzer/mlos_analyzer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
4 changes: 4 additions & 0 deletions mlos_analyzer/mlos_analyzer/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
37 changes: 37 additions & 0 deletions mlos_analyzer/mlos_analyzer/api/endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import logging

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware

from ..core.storage import storage
from .models import ExperimentExplanationRequest

app = FastAPI()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)


@app.get("/experiments")
def get_experiments():
return list(storage.experiments.keys())


@app.get("/experiment_results/{experiment_id}")
def get_experiment_results(experiment_id: str):
try:
exp = storage.experiments[experiment_id]
return exp.results_df.to_dict(orient="records")
except KeyError:
raise HTTPException(status_code=404, detail="Experiment not found")
9 changes: 9 additions & 0 deletions mlos_analyzer/mlos_analyzer/api/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
from pydantic import BaseModel


class ExperimentExplanationRequest(BaseModel):
experiment_id: str
4 changes: 4 additions & 0 deletions mlos_analyzer/mlos_analyzer/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
10 changes: 10 additions & 0 deletions mlos_analyzer/mlos_analyzer/core/storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
from mlos_bench.storage import from_config

try:
storage = from_config(config="storage/sqlite.jsonc") # PLACEHOLDER
except Exception as e:
raise Exception(f"Error loading storage configuration: {e}")
4 changes: 4 additions & 0 deletions mlos_analyzer/mlos_analyzer/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
4 changes: 4 additions & 0 deletions mlos_analyzer/mlos_analyzer/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
32 changes: 32 additions & 0 deletions mlos_analyzer/mlos_analyzer/visualization/correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import pandas as pd
import plotly.express as px


def plot_heatmap(df: pd.DataFrame):
numeric_df = df.select_dtypes(include=["int64", "float64"])
config_columns = [col for col in numeric_df.columns if col.startswith("config")]
result_columns = [col for col in numeric_df.columns if col.startswith("result")]

combined_data = numeric_df[config_columns + result_columns]
correlation_matrix = combined_data.corr()

fig = px.imshow(
correlation_matrix,
title="Configuration vs Results Correlation Heatmap",
color_continuous_scale="RdBu",
)
return fig


def plot_correlation_table_target(df: pd.DataFrame, target_col: str):
numeric_df = df.select_dtypes(include=["int64", "float64"])
correlations = numeric_df.corrwith(numeric_df[target_col]).sort_values(ascending=False)

fig = px.bar(
x=correlations.index, y=correlations.values, title=f"Correlations with {target_col}"
)
return fig
28 changes: 28 additions & 0 deletions mlos_analyzer/mlos_analyzer/visualization/distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#

# src/mlos_analyzer/visualization/distributions.py
import plotly.express as px
import plotly.figure_factory as ff


def plot_metric_distribution(df, metric: str):
fig = ff.create_distplot(
[df[metric].dropna()], [metric], bin_size=(df[metric].max() - df[metric].min()) / 30
)
fig.update_layout(title=f"Distribution of {metric}")
return fig


def plot_violin_comparison(df, metric: str, group_by: str = "tunable_config_id"):
fig = px.violin(
df,
x=group_by,
y=metric,
box=True,
points="all",
title=f"{metric} Distribution by {group_by}",
)
return fig
30 changes: 30 additions & 0 deletions mlos_analyzer/mlos_analyzer/visualization/failure_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
import pandas as pd
import plotly.express as px


def plot_success_failure_distribution(df: pd.DataFrame):
status_counts = df["status"].value_counts()
return px.pie(
values=status_counts.values,
names=status_counts.index,
title="Success/Failure Distribution",
)


def plot_failure_rate_by_config(df: pd.DataFrame):
failure_rate = (
df.groupby("tunable_config_id")["status"]
.apply(lambda x: (x == "FAILED").mean())
.reset_index()
)
failure_rate.columns = ["tunable_config_id", "failure_rate"]
return px.bar(
failure_rate,
x="tunable_config_id",
y="failure_rate",
title="Failure Rate by Configuration",
)
Loading