microsoft · yshady-acheev · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
@@ -0,0 +1,45 @@
+# MLOS Analyzer Dashboard
+
+This project provides a comprehensive dashboard components for analyzing experiments conducted using MLOS. The dashboard components enables users to visualize experiment results, analyze performance metrics, and conduct statistical analyses interactively.
+
+The dashboard components can also be used within a notebook, or streamlit, or any platform which supports plotly.
+
+Another use would be to automate the process of running statistical significance tests to analyze and identify meaningful differences between configuration sets. It enables users to streamline performance analysis by automatically detecting which configurations yield compelling performance improvements.
+
+## Features
+
+1. **Experiment Overview**:
+
+   - View dataset statistics and configuration distributions.
+   - Inspect the overall performance of your experiments.
+
+1. **Performance Analysis**:
+
+   - Visualize metrics with whisker plots and heatmaps.
+   - Perform advanced analysis using parallel coordinates and performance radar plots.
+
+1. **Time Series Analysis**:
+
+   - Analyze metrics over time.
+   - Apply moving average filters for better trend visualization.
+
+1. **Distribution Analysis**:
+
+   - View metric distributions with histogram and violin plots.
+
+1. **Failure Analysis**:
+
+   - Visualize success/failure distributions.
+   - Analyze failure rates across different configurations.
+
+1. **Statistical Analysis**:
+
+   - Perform pairwise statistical tests for configuration comparison.
+   - Compare score distributions between different configurations.
+
+## Installation
+
+```bash
+pip install -r requirements.txt
+python setup.py install
+```
@@ -0,0 +1,144 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+
+# Run as "streamlit run example_usage.py"
+
+import streamlit as st
+from mlos_analyzer.core.storage import storage
+from mlos_analyzer.visualization.correlation import (
+    plot_correlation_table_target,
+    plot_heatmap,
+)
+from mlos_analyzer.visualization.distributions import (
+    plot_metric_distribution,
+    plot_violin_comparison,
+)
+from mlos_analyzer.visualization.failure_metrics import (
+    plot_failure_rate_by_config,
+    plot_success_failure_distribution,
+)
+from mlos_analyzer.visualization.performance import (
+    plot_parallel_coordinates,
+    plot_performance_radar,
+)
+from mlos_analyzer.visualization.plots import plot_whisker_plots
+from mlos_analyzer.visualization.statistical import (
+    compare_score_distributions,
+    run_pairwise_stat_tests,
+)
+from mlos_analyzer.visualization.timeseries import (
+    plot_metric_over_time,
+    plot_moving_average,
+)
+
+
+def main():
+    st.set_page_config(page_title="MLOS Analyzer Dashboard", layout="wide")
+    st.title("MLOS Experiment Analysis Dashboard")
+
+    st.sidebar.header("Settings")
+    experiment_ids = list(storage.experiments.keys())
+    selected_experiment = st.sidebar.selectbox("Select Experiment", experiment_ids)
+
+    if selected_experiment:
+        df = storage.experiments[selected_experiment].results_df
+        metrics = [col for col in df.columns if col.startswith("result")]
+
+        tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(
+            ["Overview", "Performance", "Time Series", "Distributions", "Failures", "Statistics"]
+        )
+
+        with tab1:
+            st.header("Experiment Overview")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.subheader("Dataset Info")
+                st.write(df.describe())
+            with col2:
+                st.subheader("Configuration Distribution")
+                config_counts = df["tunable_config_id"].value_counts()
+                st.bar_chart(config_counts)
+
+        with tab2:
+            st.header("Performance Analysis")
+            selected_metric = st.selectbox("Select Metric", metrics, key="perf_metric")
+
+            col1, col2 = st.columns(2)
+            with col1:
+                fig_whisker = plot_whisker_plots(df, selected_metric)
+                st.plotly_chart(fig_whisker)
+            with col2:
+                fig_heatmap = plot_heatmap(df)
+                st.plotly_chart(fig_heatmap)
+
+            selected_metrics = st.multiselect(
+                "Select Metrics for Advanced Analysis", metrics, default=metrics[:3]
+            )
+            if selected_metrics:
+                col3, col4 = st.columns(2)
+                with col3:
+                    fig = plot_parallel_coordinates(df, selected_metrics)
+                    st.plotly_chart(fig)
+                with col4:
+                    fig = plot_performance_radar(df, selected_metrics)
+                    st.plotly_chart(fig)
+
+        with tab3:
+            st.header("Time Series Analysis")
+            metric = st.selectbox("Select Metric", metrics, key="ts_metric")
+            window = st.slider("Moving Average Window", 2, 20, 5)
+
+            col1, col2 = st.columns(2)
+            with col1:
+                fig = plot_metric_over_time(df, metric)
+                st.plotly_chart(fig)
+            with col2:
+                fig = plot_moving_average(df, metric, window)
+                st.plotly_chart(fig)
+
+        with tab4:
+            st.header("Distribution Analysis")
+            metric = st.selectbox("Select Metric", metrics, key="dist_metric")
+
+            col1, col2 = st.columns(2)
+            with col1:
+                fig = plot_metric_distribution(df, metric)
+                st.plotly_chart(fig)
+            with col2:
+                fig = plot_violin_comparison(df, metric)
+                st.plotly_chart(fig)
+
+        with tab5:
+            st.header("Failure Analysis")
+            col1, col2 = st.columns(2)
+            with col1:
+                fig_dist = plot_success_failure_distribution(df)
+                st.plotly_chart(fig_dist)
+            with col2:
+                fig_rate = plot_failure_rate_by_config(df)
+                st.plotly_chart(fig_rate)
+
+        with tab6:
+            st.header("Statistical Analysis")
+            test_metric = st.selectbox("Select Test Metric", metrics)
+            alpha = st.slider("Significance Level (α)", 0.01, 0.10, 0.05)
+
+            results = run_pairwise_stat_tests(df, test_metric, alpha=alpha)
+            st.dataframe(results)
+
+            st.subheader("Configuration Comparison")
+            config1, config2 = st.columns(2)
+            with config1:
+                cfg1 = st.selectbox("First Configuration", df["tunable_config_id"].unique())
+            with config2:
+                cfg2 = st.selectbox("Second Configuration", df["tunable_config_id"].unique())
+
+            if cfg1 != cfg2:
+                fig_compare = compare_score_distributions(df, test_metric, cfg1, cfg2)
+                st.plotly_chart(fig_compare)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,4 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
@@ -0,0 +1,4 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
@@ -0,0 +1,37 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+import logging
+
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+
+from ..core.storage import storage
+from .models import ExperimentExplanationRequest
+
+app = FastAPI()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+@app.get("/experiments")
+def get_experiments():
+    return list(storage.experiments.keys())
+
+
+@app.get("/experiment_results/{experiment_id}")
+def get_experiment_results(experiment_id: str):
+    try:
+        exp = storage.experiments[experiment_id]
+        return exp.results_df.to_dict(orient="records")
+    except KeyError:
+        raise HTTPException(status_code=404, detail="Experiment not found")
@@ -0,0 +1,9 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+from pydantic import BaseModel
+
+
+class ExperimentExplanationRequest(BaseModel):
+    experiment_id: str
@@ -0,0 +1,4 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
@@ -0,0 +1,10 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+from mlos_bench.storage import from_config
+
+try:
+    storage = from_config(config="storage/sqlite.jsonc")  # PLACEHOLDER
+except Exception as e:
+    raise Exception(f"Error loading storage configuration: {e}")
@@ -0,0 +1,4 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
@@ -0,0 +1,4 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
@@ -0,0 +1,32 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+import pandas as pd
+import plotly.express as px
+
+
+def plot_heatmap(df: pd.DataFrame):
+    numeric_df = df.select_dtypes(include=["int64", "float64"])
+    config_columns = [col for col in numeric_df.columns if col.startswith("config")]
+    result_columns = [col for col in numeric_df.columns if col.startswith("result")]
+
+    combined_data = numeric_df[config_columns + result_columns]
+    correlation_matrix = combined_data.corr()
+
+    fig = px.imshow(
+        correlation_matrix,
+        title="Configuration vs Results Correlation Heatmap",
+        color_continuous_scale="RdBu",
+    )
+    return fig
+
+
+def plot_correlation_table_target(df: pd.DataFrame, target_col: str):
+    numeric_df = df.select_dtypes(include=["int64", "float64"])
+    correlations = numeric_df.corrwith(numeric_df[target_col]).sort_values(ascending=False)
+
+    fig = px.bar(
+        x=correlations.index, y=correlations.values, title=f"Correlations with {target_col}"
+    )
+    return fig
@@ -0,0 +1,28 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+
+# src/mlos_analyzer/visualization/distributions.py
+import plotly.express as px
+import plotly.figure_factory as ff
+
+
+def plot_metric_distribution(df, metric: str):
+    fig = ff.create_distplot(
+        [df[metric].dropna()], [metric], bin_size=(df[metric].max() - df[metric].min()) / 30
+    )
+    fig.update_layout(title=f"Distribution of {metric}")
+    return fig
+
+
+def plot_violin_comparison(df, metric: str, group_by: str = "tunable_config_id"):
+    fig = px.violin(
+        df,
+        x=group_by,
+        y=metric,
+        box=True,
+        points="all",
+        title=f"{metric} Distribution by {group_by}",
+    )
+    return fig
@@ -0,0 +1,30 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+import pandas as pd
+import plotly.express as px
+
+
+def plot_success_failure_distribution(df: pd.DataFrame):
+    status_counts = df["status"].value_counts()
+    return px.pie(
+        values=status_counts.values,
+        names=status_counts.index,
+        title="Success/Failure Distribution",
+    )
+
+
+def plot_failure_rate_by_config(df: pd.DataFrame):
+    failure_rate = (
+        df.groupby("tunable_config_id")["status"]
+        .apply(lambda x: (x == "FAILED").mean())
+        .reset_index()
+    )
+    failure_rate.columns = ["tunable_config_id", "failure_rate"]
+    return px.bar(
+        failure_rate,
+        x="tunable_config_id",
+        y="failure_rate",
+        title="Failure Rate by Configuration",
+    )