From 08644550450e51ce13cd09e30cc7d34c140675a0 Mon Sep 17 00:00:00 2001
From: RuanJohn <ruanjdekock@gmail.com>
Date: Tue, 30 Jul 2024 15:06:14 +0200
Subject: [PATCH] feat: process and plotting for full data set

---
 .gitignore                           |   1 +
 duplicate_seed_data.py               |  37 +++++
 keep_certain_tasks.py                |  37 +++++
 marl_eval/plotting_tools/plotting.py |   7 +
 master_episode_norm.py               |  92 +++++++++++++
 plot_data.py                         | 188 +++++++++++++++++++++++++
 plot_data_all_envs.py                | 195 ++++++++++++++++++++++++++
 plot_data_cleaner.py                 | 199 +++++++++++++++++++++++++++
 plot_data_connector.py               | 199 +++++++++++++++++++++++++++
 plot_data_lbf.py                     | 191 +++++++++++++++++++++++++
 plot_data_mabrax.py                  | 195 ++++++++++++++++++++++++++
 plot_data_rware.py                   | 195 ++++++++++++++++++++++++++
 plot_data_smax.py                    | 193 ++++++++++++++++++++++++++
 process_env_name.py                  |  96 +++++++++++++
 process_step_counts.py               |  92 +++++++++++++
 process_step_counts_mabrax.py        |  94 +++++++++++++
 process_win_rate.py                  |  38 +++++
 pull_data.py                         |  37 +++++
 remove_algo_data.py                  |  53 +++++++
 remove_certain_tasks.py              |  33 +++++
 rename_algos.py                      |  31 +++++
 21 files changed, 2203 insertions(+)
 create mode 100644 duplicate_seed_data.py
 create mode 100644 keep_certain_tasks.py
 create mode 100644 master_episode_norm.py
 create mode 100644 plot_data.py
 create mode 100644 plot_data_all_envs.py
 create mode 100644 plot_data_cleaner.py
 create mode 100644 plot_data_connector.py
 create mode 100644 plot_data_lbf.py
 create mode 100644 plot_data_mabrax.py
 create mode 100644 plot_data_rware.py
 create mode 100644 plot_data_smax.py
 create mode 100644 process_env_name.py
 create mode 100644 process_step_counts.py
 create mode 100644 process_step_counts_mabrax.py
 create mode 100644 process_win_rate.py
 create mode 100644 pull_data.py
 create mode 100644 remove_algo_data.py
 create mode 100644 remove_certain_tasks.py
 create mode 100644 rename_algos.py

diff --git a/.gitignore b/.gitignore
index 4449405b..c2be5213 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,6 +141,7 @@ processed_*.json
 
 # Plots folder
 plots
+data
 
 # csv data
 *.csv
diff --git a/duplicate_seed_data.py b/duplicate_seed_data.py
new file mode 100644
index 00000000..cd9f5b6c
--- /dev/null
+++ b/duplicate_seed_data.py
@@ -0,0 +1,37 @@
+import json
+
+
+def duplicate_seed_data(data, env_name, task_name, algo_name, missing_seed, source_seed):
+    if env_name in data:
+        if task_name in data[env_name]:
+            if algo_name in data[env_name][task_name]:
+                if source_seed in data[env_name][task_name][algo_name]:
+                    # Duplicate the data
+                    data[env_name][task_name][algo_name][missing_seed] = data[env_name][task_name][algo_name][source_seed]
+                    print(f"Duplicated data for {env_name}/{task_name}/{algo_name}/{missing_seed}")
+                else:
+                    print(f"Source seed {source_seed} not found for {env_name}/{task_name}/{algo_name}")
+            else:
+                print(f"Algorithm {algo_name} not found for {env_name}/{task_name}")
+        else:
+            print(f"Task {task_name} not found for {env_name}")
+    else:
+        print(f"Environment {env_name} not found")
+
+# Load the JSON file
+file_path = './data/full-benchmark-update/merged_data/metrics_winrate_processed_no_retmat.json'
+new_file_path = './data/full-benchmark-update/merged_data/interim_seed_duplicated.json'
+with open(file_path, 'r') as file:
+    data = json.load(file)
+
+# Duplicate data for the first case
+duplicate_seed_data(data, 'Cleaner', 'clean-15x15x6a', 'ff_mappo', 'seed_9', 'seed_8')
+
+# Duplicate data for the second case
+duplicate_seed_data(data, 'Cleaner', 'clean-15x15x6a', 'retmat_memory', 'seed_4', 'seed_8')
+
+# Save the modified data back to the JSON file
+with open(new_file_path, 'w') as file:
+    json.dump(data, file, indent=2)
+
+print("JSON file has been updated.")
\ No newline at end of file
diff --git a/keep_certain_tasks.py b/keep_certain_tasks.py
new file mode 100644
index 00000000..a80a59e9
--- /dev/null
+++ b/keep_certain_tasks.py
@@ -0,0 +1,37 @@
+import json
+
+
+def filter_json(data, tasks_to_keep):
+    filtered_data = {}
+    for env_name, env_tasks in data.items():
+        kept_tasks = {task: info for task, info in env_tasks.items() if task in tasks_to_keep}
+        if kept_tasks:
+            filtered_data[env_name] = kept_tasks
+    return filtered_data
+
+# Example usage:
+input_file = 'data/limited_benchmark/retmat-mat-ppo/merged_data/metrics_winrate_processed.json'
+output_file = 'data/limited_benchmark/retmat-mat-ppo/merged_data/task_name_processed.json'
+tasks_to_keep = [
+    'tiny-4ag',
+    'small-4ag',
+    '5m_vs_6m',
+    '27m_vs_30m',
+    'smacv2_10_units',
+    '15x15-3p-5f',
+    '15x15-4p-5f',
+    '6h_vs_8z',
+]  # Replace with your list of tasks to keep
+
+# Read the input JSON file
+with open(input_file, 'r') as f:
+    data = json.load(f)
+
+# Filter the data
+filtered_data = filter_json(data, tasks_to_keep)
+
+# Write the filtered data to the output JSON file
+with open(output_file, 'w') as f:
+    json.dump(filtered_data, f, indent=2)
+
+print(f"Filtered data has been written to {output_file}")
\ No newline at end of file
diff --git a/marl_eval/plotting_tools/plotting.py b/marl_eval/plotting_tools/plotting.py
index 49b33e5f..22a7550e 100644
--- a/marl_eval/plotting_tools/plotting.py
+++ b/marl_eval/plotting_tools/plotting.py
@@ -65,6 +65,7 @@ def performance_profiles(
     upper_algo_dict = {algo.upper(): value for algo, value in data_dictionary.items()}
     data_dictionary = upper_algo_dict
     algorithms = list(data_dictionary.keys())
+    algorithms.sort(reverse=True)
 
     if legend_map is not None:
         legend_map = {algo.upper(): value for algo, value in legend_map.items()}
@@ -73,6 +74,7 @@ def performance_profiles(
             legend_map[algo]: value for algo, value in data_dictionary.items()
         }
         algorithms = list(data_dictionary.keys())
+        algorithms.sort(reverse=True)
 
     if metric_name in metrics_to_normalize:
         xlabel = "Normalized " + " ".join(metric_name.split("_"))
@@ -140,6 +142,7 @@ def aggregate_scores(
     upper_algo_dict = {algo.upper(): value for algo, value in data_dictionary.items()}
     data_dictionary = upper_algo_dict
     algorithms = list(data_dictionary.keys())
+    algorithms.sort(reverse=True)
 
     if legend_map is not None:
         legend_map = {algo.upper(): value for algo, value in legend_map.items()}
@@ -148,6 +151,7 @@ def aggregate_scores(
             legend_map[algo]: value for algo, value in data_dictionary.items()
         }
         algorithms = list(data_dictionary.keys())
+        algorithms.sort(reverse=True)
 
     aggregate_func = lambda x: np.array(  # noqa: E731
         [
@@ -346,6 +350,7 @@ def sample_efficiency_curves(
     upper_algo_dict = {algo.upper(): value for algo, value in data_dictionary.items()}
     data_dictionary = upper_algo_dict
     algorithms = list(data_dictionary.keys())
+    algorithms.sort(reverse=True)
 
     if legend_map is not None:
         legend_map = {algo.upper(): value for algo, value in legend_map.items()}
@@ -354,6 +359,7 @@ def sample_efficiency_curves(
             legend_map[algo]: value for algo, value in data_dictionary.items()
         }
         algorithms = list(data_dictionary.keys())
+        algorithms.sort(reverse=True)
 
     # Find lowest values from amount of runs that have completed
     # across all algorithms
@@ -441,6 +447,7 @@ def plot_single_task(
     task_mean_ci_data = upper_algo_dict
     algorithms = list(task_mean_ci_data.keys())
     algorithms.remove("extra")
+    algorithms.sort(reverse=True)
 
     if legend_map is not None:
         legend_map = {algo.upper(): value for algo, value in legend_map.items()}
diff --git a/master_episode_norm.py b/master_episode_norm.py
new file mode 100644
index 00000000..318b7b47
--- /dev/null
+++ b/master_episode_norm.py
@@ -0,0 +1,92 @@
+import json
+
+import numpy as np
+
+
+def remove_win_rate(data):
+    if isinstance(data, dict):
+        for key in list(data.keys()):
+            if key == 'win_rate':
+                del data[key]
+            else:
+                data[key] = remove_win_rate(data[key])
+    elif isinstance(data, list):
+        return [remove_win_rate(item) for item in data]
+    return data
+
+def process_json_data(input_file, output_file):
+    # Load the JSON data
+    with open(input_file, 'r') as f:
+        data = json.load(f)
+
+    # Remove win_rate from the data
+    data = remove_win_rate(data)
+
+    # Find min and max values for each environment
+    env_min_max = {}
+    for env_name, env_data in data.items():
+        all_returns = []
+        for task_data in env_data.values():
+            for algo_data in task_data.values():
+                for seed_data in algo_data.values():
+                    # Add absolute metrics
+                    if 'absolute_metrics' in seed_data:
+                        all_returns.extend(seed_data['absolute_metrics'].get('mean_episode_return', []))
+                    
+                    # Add step metrics
+                    for step_data in seed_data.values():
+                        if isinstance(step_data, dict) and 'mean_episode_return' in step_data:
+                            all_returns.extend(step_data['mean_episode_return'])
+        
+        if all_returns:
+            env_min_max[env_name] = (min(all_returns), max(all_returns))
+        else:
+            print(f"Warning: No valid mean_episode_return values found for environment {env_name}")
+            env_min_max[env_name] = (0, 1)  # Default range if no data
+
+    # Min-max normalize the data
+    for env_name, env_data in data.items():
+        env_min, env_max = env_min_max[env_name]
+        if env_min == env_max:
+            print(f"Warning: All mean_episode_return values are the same for environment {env_name}")
+            env_max = env_min + 1  # Avoid division by zero
+
+        for task_data in env_data.values():
+            for algo_data in task_data.values():
+                for seed_data in algo_data.values():
+                    # Normalize absolute metrics
+                    if 'absolute_metrics' in seed_data:
+                        seed_data['absolute_metrics']['mean_episode_return'] = [
+                            (x - env_min) / (env_max - env_min) if env_max != env_min else 0.5
+                            for x in seed_data['absolute_metrics'].get('mean_episode_return', [])
+                        ]
+                    
+                    # Normalize step metrics
+                    for step_data in seed_data.values():
+                        if isinstance(step_data, dict) and 'mean_episode_return' in step_data:
+                            step_data['mean_episode_return'] = [
+                                (x - env_min) / (env_max - env_min) if env_max != env_min else 0.5
+                                for x in step_data['mean_episode_return']
+                            ]
+
+    # Combine all environments under 'AllEnvs'
+    all_envs_data = {}
+    for env_data in data.values():
+        for task_name, task_data in env_data.items():
+            if task_name not in all_envs_data:
+                all_envs_data[task_name] = {}
+            all_envs_data[task_name].update(task_data)
+
+    # Create the final output structure
+    output_data = {'AllEnvs': all_envs_data}
+
+    # Save the processed data to a new JSON file
+    with open(output_file, 'w') as f:
+        json.dump(output_data, f, indent=2)
+
+    print(f"Processed data saved to {output_file}")
+
+# Usage
+input_file = 'data/full-benchmark-update/merged_data/interim_seed_duplicated_cleaner_filter.json'
+output_file = 'data/full-benchmark-update/merged_data/master_norm_episode_return.json'
+process_json_data(input_file, output_file)
\ No newline at end of file
diff --git a/plot_data.py b/plot_data.py
new file mode 100644
index 00000000..57061ae1
--- /dev/null
+++ b/plot_data.py
@@ -0,0 +1,188 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "RobotWarehouse"
+SAVE_PDF = False
+
+data_dir = "data/retmat_no_xpos_all/merged_data/metrics_winrate_processed.json"
+png_plot_dir = "plots/retmat-no-xpos-rware/png/"
+pdf_plot_dir = "plots/retmat-no-xpos-rware/pdf/"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    "retmat": "Ret-MAT",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat", "mat"],
+        ["mat", "ff_ippo"],
+        ["mat", "rec_ippo"],
+        ["mat", "ff_mappo"],
+        ["mat", "rec_mappo"],
+        ["retmat", "ff_ippo"],
+        ["retmat", "rec_ippo"],
+        ["retmat", "ff_mappo"],
+        ["retmat", "rec_mappo"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="mean_episode_return",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(
+        f"{png_plot_dir}rware_{task}_agg_return.png", bbox_inches="tight"
+    )
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}rware_{task}_agg_return.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}return_sample_effeciency_curve.png", bbox_inches="tight"
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}return_sample_effeciency_curve.pdf", bbox_inches="tight"
+    )
diff --git a/plot_data_all_envs.py b/plot_data_all_envs.py
new file mode 100644
index 00000000..69621f6c
--- /dev/null
+++ b/plot_data_all_envs.py
@@ -0,0 +1,195 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "AllEnvs"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/master_norm_episode_return.json"
+png_plot_dir = "plots/full-benchmark-update/all_envs/png/"
+pdf_plot_dir = "plots/full-benchmark-update/all_envs/pdf/"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="mean_episode_return",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(
+        f"{png_plot_dir}rware_{task}_agg_return.png", bbox_inches="tight"
+    )
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}rware_{task}_agg_return.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}return_sample_effeciency_curve.png", bbox_inches="tight"
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}return_sample_effeciency_curve.pdf", bbox_inches="tight"
+    )
diff --git a/plot_data_cleaner.py b/plot_data_cleaner.py
new file mode 100644
index 00000000..5f6eb653
--- /dev/null
+++ b/plot_data_cleaner.py
@@ -0,0 +1,199 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "Cleaner"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated_cleaner_filter.json"
+png_plot_dir = "plots/full-benchmark-update/cleaner_no_retmat/png/"
+pdf_plot_dir = "plots/full-benchmark-update/cleaner_no_retmat/pdf/"
+
+PLOT_METRIC = "win_rate" # "mean_episode_return" "win_rate"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="mean_episode_return",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(
+        f"{png_plot_dir}_{task}_agg_{PLOT_METRIC}.png", bbox_inches="tight"
+    )
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}_{task}_agg_{PLOT_METRIC}_.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}_{PLOT_METRIC}_sample_effeciency_curve.png",
+    bbox_inches="tight",
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}_{PLOT_METRIC}_sample_effeciency_curve.pdf",
+        bbox_inches="tight",
+    )
\ No newline at end of file
diff --git a/plot_data_connector.py b/plot_data_connector.py
new file mode 100644
index 00000000..3ea066be
--- /dev/null
+++ b/plot_data_connector.py
@@ -0,0 +1,199 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "MaConnector"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+png_plot_dir = "plots/full-benchmark-update/connector_no_retmat/png/"
+pdf_plot_dir = "plots/full-benchmark-update/connector_no_retmat/pdf/"
+
+PLOT_METRIC = "win_rate" # "mean_episode_return"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name=PLOT_METRIC,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name=PLOT_METRIC,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name=PLOT_METRIC,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name=PLOT_METRIC,
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(
+        f"{png_plot_dir}_{task}_agg_{PLOT_METRIC}.png", bbox_inches="tight"
+    )
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}_{task}_agg_{PLOT_METRIC}_.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name=PLOT_METRIC,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}_{PLOT_METRIC}_sample_effeciency_curve.png",
+    bbox_inches="tight",
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}_{PLOT_METRIC}_sample_effeciency_curve.pdf",
+        bbox_inches="tight",
+    )
diff --git a/plot_data_lbf.py b/plot_data_lbf.py
new file mode 100644
index 00000000..c61d13ad
--- /dev/null
+++ b/plot_data_lbf.py
@@ -0,0 +1,191 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "LevelBasedForaging"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+png_plot_dir = "plots/full-benchmark-update/lbf_no_retmat/png/"
+pdf_plot_dir = "plots/full-benchmark-update/lbf_no_retmat/pdf/"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="mean_episode_return",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(f"{png_plot_dir}_{task}_agg_return.png", bbox_inches="tight")
+    if SAVE_PDF:
+        fig.figure.savefig(f"{pdf_plot_dir}_{task}_agg_return.pdf", bbox_inches="tight")
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}return_sample_effeciency_curve.png", bbox_inches="tight"
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}return_sample_effeciency_curve.pdf", bbox_inches="tight"
+    )
diff --git a/plot_data_mabrax.py b/plot_data_mabrax.py
new file mode 100644
index 00000000..fda090a8
--- /dev/null
+++ b/plot_data_mabrax.py
@@ -0,0 +1,195 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "MaBrax"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+png_plot_dir = "plots/full-benchmark-update/mabrax_no_retmat/png/"
+pdf_plot_dir = "plots/full-benchmark-update/mabrax_no_retmat/pdf/"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="mean_episode_return",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(
+        f"{png_plot_dir}mabrax_{task}_agg_return.png", bbox_inches="tight"
+    )
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}mabrax_{task}_agg_return.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}return_sample_effeciency_curve.png", bbox_inches="tight"
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}return_sample_effeciency_curve.pdf", bbox_inches="tight"
+    )
diff --git a/plot_data_rware.py b/plot_data_rware.py
new file mode 100644
index 00000000..6cce150c
--- /dev/null
+++ b/plot_data_rware.py
@@ -0,0 +1,195 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "RobotWarehouse"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+png_plot_dir = "plots/full-benchmark-update/rware_no_retmat/png/"
+pdf_plot_dir = "plots/full-benchmark-update/rware_no_retmat/pdf/"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="mean_episode_return",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(
+        f"{png_plot_dir}rware_{task}_agg_return.png", bbox_inches="tight"
+    )
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}rware_{task}_agg_return.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="mean_episode_return",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}return_sample_effeciency_curve.png", bbox_inches="tight"
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}return_sample_effeciency_curve.pdf", bbox_inches="tight"
+    )
diff --git a/plot_data_smax.py b/plot_data_smax.py
new file mode 100644
index 00000000..82a210eb
--- /dev/null
+++ b/plot_data_smax.py
@@ -0,0 +1,193 @@
+# python3
+# Copyright 2022 InstaDeep Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import matplotlib.pyplot as plt
+
+from marl_eval.plotting_tools.plotting import (
+    aggregate_scores,
+    performance_profiles,
+    plot_single_task,
+    probability_of_improvement,
+    sample_efficiency_curves,
+)
+from marl_eval.utils.data_processing_utils import (
+    create_matrices_for_rliable,
+    data_process_pipeline,
+)
+
+ENV_NAME = "Smax"
+SAVE_PDF = False
+
+data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+png_plot_dir = "plots/full-benchmark-update/smax_no_retmat/png/"
+pdf_plot_dir = "plots/full-benchmark-update/smax_no_retmat/pdf/"
+
+legend_map = {
+    "rec_mappo": "Rec MAPPO",
+    "rec_ippo": "Rec IPPO",
+    "ff_mappo": "FF MAPPO",
+    "ff_ippo": "FF IPPO",
+    "mat": "MAT",
+    # "retmat": "RetMAT",
+    "retmat_memory": "RetMAT Memory",
+    # "retmat_main_memory": "RetMAT Main Memory",
+    # "retmat_yarn_memory": "RetMAT Yarn Memory",
+}
+
+##############################
+# Read in and process data
+##############################
+METRICS_TO_NORMALIZE = ["mean_episode_return"]
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Create folder for storing plots
+if not os.path.exists(png_plot_dir):
+    os.makedirs(png_plot_dir)
+if not os.path.exists(pdf_plot_dir):
+    os.makedirs(pdf_plot_dir)
+
+
+##############################
+# Probability of improvement
+# Aggregate scores
+# Performance profiles
+##############################
+
+# These should be done with normalized data
+
+# probability of improvement
+fig = probability_of_improvement(
+    environment_comparison_matrix,
+    metric_name="win_rate",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    algorithms_to_compare=[
+        ["retmat_memory", "mat"],
+        # ["mat", "ff_ippo"],
+        # ["mat", "rec_ippo"],
+        # ["mat", "ff_mappo"],
+        # ["mat", "rec_mappo"],
+        ["retmat_memory", "ff_ippo"],
+        ["retmat_memory", "rec_ippo"],
+        ["retmat_memory", "ff_mappo"],
+        ["retmat_memory", "rec_mappo"],
+        # ["retmat_main_memory", "retmat_yarn_memory"],
+        # ["retmat_memory", "mat"],
+        # ["retmat_memory", "retmat"],
+        # ["retmat_yarn_memory", "mat"],
+    ],
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}prob_of_improvement.pdf", bbox_inches="tight")
+
+# aggregate scores
+fig, _, _ = aggregate_scores(  # type: ignore
+    environment_comparison_matrix,
+    metric_name="win_rate",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    save_tabular_as_latex=True,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}aggregate_scores.pdf", bbox_inches="tight")
+
+# performance profiles
+fig = performance_profiles(
+    environment_comparison_matrix,
+    metric_name="win_rate",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(f"{png_plot_dir}performance_profile.png", bbox_inches="tight")
+if SAVE_PDF:
+    fig.figure.savefig(f"{pdf_plot_dir}performance_profile.pdf", bbox_inches="tight")
+
+
+##############################
+# Plot episode return data
+##############################
+
+# This should not be done with normalized data
+
+METRICS_TO_NORMALIZE = []
+
+with open(data_dir) as f:
+    raw_data = json.load(f)
+
+processed_data = data_process_pipeline(
+    raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE
+)
+
+environment_comparison_matrix, sample_effeciency_matrix = create_matrices_for_rliable(
+    data_dictionary=processed_data,
+    environment_name=ENV_NAME,
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+)
+
+# Get all tasks
+tasks = list(processed_data[ENV_NAME.lower()].keys())
+
+# Aggregate data over a single tasks
+for task in tasks:
+    fig = plot_single_task(
+        processed_data=processed_data,
+        environment_name=ENV_NAME,
+        task_name=task,
+        metric_name="win_rate",
+        metrics_to_normalize=METRICS_TO_NORMALIZE,
+        legend_map=legend_map,
+    )
+
+    fig.figure.savefig(f"{png_plot_dir}_{task}_agg_win_rate.png", bbox_inches="tight")
+    if SAVE_PDF:
+        fig.figure.savefig(
+            f"{pdf_plot_dir}_{task}_agg_win_rate.pdf", bbox_inches="tight"
+        )
+
+    # Close the figure object
+    plt.close(fig.figure)
+
+# Aggregate data over all environment tasks.
+
+fig, _, _ = sample_efficiency_curves(  # type: ignore
+    sample_effeciency_matrix,
+    metric_name="win_rate",
+    metrics_to_normalize=METRICS_TO_NORMALIZE,
+    legend_map=legend_map,
+)
+fig.figure.savefig(
+    f"{png_plot_dir}win_rate_sample_effeciency_curve.png", bbox_inches="tight"
+)
+if SAVE_PDF:
+    fig.figure.savefig(
+        f"{pdf_plot_dir}win_rate_sample_effeciency_curve.pdf", bbox_inches="tight"
+    )
diff --git a/process_env_name.py b/process_env_name.py
new file mode 100644
index 00000000..1c6e229b
--- /dev/null
+++ b/process_env_name.py
@@ -0,0 +1,96 @@
+import json
+
+
+# Load your JSON data
+def load_json(filename):
+    with open(filename) as file:
+        data = json.load(file)
+    return data
+
+
+# Save the modified data back into JSON
+def save_json(filename, data):
+    with open(filename, "w") as file:
+        json.dump(data, file, indent=4)
+
+
+# Merge "HeuristicEnemySMAX" into "Smax"
+def merge_data(data):
+    if "HeuristicEnemySMAX" in data and "Smax" in data:
+        # Iterate through the task_names in "HeuristicEnemySMAX"
+        for task_name, algorithms in data["HeuristicEnemySMAX"].items():
+            if task_name not in data["Smax"]:
+                # If task_name does not exist in "Smax", add it
+                data["Smax"][task_name] = algorithms
+            else:
+                # If task_name exists, merge the algorithm_name data
+                for algorithm_name, seeds in algorithms.items():
+                    if algorithm_name not in data["Smax"][task_name]:
+                        data["Smax"][task_name][algorithm_name] = seeds
+                    else:
+                        # Merge seed data
+                        data["Smax"][task_name][algorithm_name].update(seeds)
+        # Remove "HeuristicEnemySMAX" from the data
+        del data["HeuristicEnemySMAX"]
+    return data
+
+
+# Merge "RobotWarehouse-v0" into "RobotWarehouse"
+def merge_data_rware(data):
+    if "RobotWarehouse-v0" in data and "RobotWarehouse" in data:
+        # Iterate through the task_names in "HeuristicEnemySMAX"
+        for task_name, algorithms in data["RobotWarehouse-v0"].items():
+            if task_name not in data["RobotWarehouse"]:
+                # If task_name does not exist in "Smax", add it
+                data["RobotWarehouse"][task_name] = algorithms
+            else:
+                # If task_name exists, merge the algorithm_name data
+                for algorithm_name, seeds in algorithms.items():
+                    if algorithm_name not in data["RobotWarehouse"][task_name]:
+                        data["RobotWarehouse"][task_name][algorithm_name] = seeds
+                    else:
+                        # Merge seed data
+                        data["RobotWarehouse"][task_name][algorithm_name].update(seeds)
+        # Remove "RobotWarehouse-v0" from the data
+        del data["RobotWarehouse-v0"]
+    return data
+
+
+# Merge "LevelBasedForaging-v0" into "LevelBasedForaging"
+def merge_data_lbf(data):
+    if "LevelBasedForaging-v0" in data and "LevelBasedForaging" in data:
+        # Iterate through the task_names in "HeuristicEnemySMAX"
+        for task_name, algorithms in data["LevelBasedForaging-v0"].items():
+            if task_name not in data["LevelBasedForaging"]:
+                # If task_name does not exist in "Smax", add it
+                data["LevelBasedForaging"][task_name] = algorithms
+            else:
+                # If task_name exists, merge the algorithm_name data
+                for algorithm_name, seeds in algorithms.items():
+                    if algorithm_name not in data["LevelBasedForaging"][task_name]:
+                        data["LevelBasedForaging"][task_name][algorithm_name] = seeds
+                    else:
+                        # Merge seed data
+                        data["LevelBasedForaging"][task_name][algorithm_name].update(
+                            seeds
+                        )
+        # Remove "RobotWarehouse-v0" from the data
+        del data["LevelBasedForaging-v0"]
+    return data
+
+
+# Main function to load, process, and save the JSON data
+def main(json_filename, new_json_filename):
+    data = load_json(json_filename)
+    data = merge_data(data)
+    data = merge_data_rware(data)
+    data = merge_data_lbf(data)
+    save_json(new_json_filename, data)
+
+
+# Replace 'your_file.json' with your actual JSON file name
+json_filename = (
+    "./data/full-benchmark-update/merged_data/metrics.json"
+)
+new_json_filename = "./data/full-benchmark-update/merged_data/metrics_name_processed.json"
+main(json_filename, new_json_filename)
diff --git a/process_step_counts.py b/process_step_counts.py
new file mode 100644
index 00000000..e8f0b953
--- /dev/null
+++ b/process_step_counts.py
@@ -0,0 +1,92 @@
+import json
+
+import numpy as np
+
+
+def load_json(filename):
+    with open(filename) as file:
+        data = json.load(file)
+    return data
+
+
+# Save the modified data back into JSON
+def save_json(filename, data):
+    with open(filename, "w") as file:
+        json.dump(data, file, indent=4)
+
+
+def interpolate_steps(data):
+    for env_name, task_data in data.items():
+        for task_name, alg_data in task_data.items():
+            for algorithm_name, seed_data in alg_data.items():
+                for seed_key, metrics in seed_data.items():
+                    if seed_key == "absolute_metrics":
+                        continue  # Skip absolute metrics
+
+                    step_keys = sorted(
+                        [key for key in metrics.keys() if key.startswith("step_")],
+                        key=lambda x: int(x.split("_")[1]),
+                    )
+                    max_step_index = max(int(key.split("_")[1]) for key in step_keys)
+
+                    if max_step_index < 121:
+                    # if max_step_index < 199:
+                        # Interpolation
+                        x = np.array([int(k.split("_")[1]) for k in step_keys])
+                        y_step_count = np.array(
+                            [metrics[k]["step_count"] for k in step_keys]
+                        )
+                        y_elapsed_time = np.array(
+                            [metrics[k]["elapsed_time"] for k in step_keys]
+                        )
+
+                        metric_keys = [
+                            k
+                            for k in metrics[step_keys[0]].keys()
+                            if k
+                            not in ["step_count", "elapsed_time", "steps_per_second"]
+                        ]
+                        y_metrics = {
+                            metric: np.array([metrics[k][metric][0] for k in step_keys])
+                            for metric in metric_keys
+                        }
+
+                        x_new = np.linspace(
+                            0, max_step_index, 122
+                        )  # Ensure covering up to step_121
+                        # x_new = np.linspace(
+                        #     0, max_step_index, 200
+                        # )  # Ensure covering up to step_199
+                        step_count_interp = np.interp(x_new, x, y_step_count)
+                        elapsed_time_interp = np.interp(x_new, x, y_elapsed_time)
+                        metrics_interp = {
+                            metric: np.interp(x_new, x, y)
+                            for metric, y in y_metrics.items()
+                        }
+
+                        # Update the data with interpolated values
+                        for i in range(
+                            len(x_new)
+                        ):  # Adjusted to iterate over the new range
+                            # Now directly using i to ensure step_121 is included
+                            step_key = f"step_{i}"
+                            metrics[step_key] = {
+                                "step_count": int(step_count_interp[i]),
+                                "elapsed_time": elapsed_time_interp[i],
+                                "steps_per_second": metrics[step_keys[-1]][
+                                    "steps_per_second"
+                                ],  # Duplicating the last value
+                            }
+                            for metric, y in metrics_interp.items():
+                                metrics[step_key][metric] = [y[i]]
+
+    return data
+
+
+# Replace 'your_file.json' with your actual JSON file name
+json_filename = "./data/full-benchmark-update/merged_data/metrics_name_processed.json"
+new_json_filename = "./data/full-benchmark-update/merged_data/metrics_stepcount_processed.json"
+
+data = load_json(json_filename)
+processed_data = interpolate_steps(data)
+save_json(new_json_filename, processed_data)
diff --git a/process_step_counts_mabrax.py b/process_step_counts_mabrax.py
new file mode 100644
index 00000000..2b4e3a4e
--- /dev/null
+++ b/process_step_counts_mabrax.py
@@ -0,0 +1,94 @@
+import json
+
+import numpy as np
+
+
+def load_json(filename):
+    with open(filename) as file:
+        data = json.load(file)
+    return data
+
+
+# Save the modified data back into JSON
+def save_json(filename, data):
+    with open(filename, "w") as file:
+        json.dump(data, file, indent=4)
+
+
+def interpolate_steps(data):
+    for env_name, task_data in data.items():
+        for task_name, alg_data in task_data.items():
+            for algorithm_name, seed_data in alg_data.items():
+                for seed_key, metrics in seed_data.items():
+                    if seed_key == "absolute_metrics":
+                        continue  # Skip absolute metrics
+
+                    step_keys = sorted(
+                        [key for key in metrics.keys() if key.startswith("step_")],
+                        key=lambda x: int(x.split("_")[1]),
+                    )
+                    max_step_index = max(int(key.split("_")[1]) for key in step_keys)
+
+                    # if max_step_index < 121:
+                    if max_step_index < 1830:
+                        # Interpolation
+                        x = np.array([int(k.split("_")[1]) for k in step_keys])
+                        y_step_count = np.array(
+                            [metrics[k]["step_count"] for k in step_keys]
+                        )
+                        y_elapsed_time = np.array(
+                            [metrics[k]["elapsed_time"] for k in step_keys]
+                        )
+
+                        metric_keys = [
+                            k
+                            for k in metrics[step_keys[0]].keys()
+                            if k
+                            not in ["step_count", "elapsed_time", "steps_per_second"]
+                        ]
+                        y_metrics = {
+                            metric: np.array([metrics[k][metric][0] for k in step_keys])
+                            for metric in metric_keys
+                        }
+
+                        # x_new = np.linspace(
+                        #     0, max_step_index, 122
+                        # )  # Ensure covering up to step_121
+                        x_new = np.linspace(
+                            0, max_step_index, 1831
+                        )  # Ensure covering up to step_1830
+                        step_count_interp = np.interp(x_new, x, y_step_count)
+                        elapsed_time_interp = np.interp(x_new, x, y_elapsed_time)
+                        metrics_interp = {
+                            metric: np.interp(x_new, x, y)
+                            for metric, y in y_metrics.items()
+                        }
+
+                        # Update the data with interpolated values
+                        for i in range(
+                            len(x_new)
+                        ):  # Adjusted to iterate over the new range
+                            # Now directly using i to ensure step_121 is included
+                            step_key = f"step_{i}"
+                            metrics[step_key] = {
+                                "step_count": int(step_count_interp[i]),
+                                "elapsed_time": elapsed_time_interp[i],
+                                "steps_per_second": metrics[step_keys[-1]][
+                                    "steps_per_second"
+                                ],  # Duplicating the last value
+                            }
+                            for metric, y in metrics_interp.items():
+                                metrics[step_key][metric] = [y[i]]
+
+    return data
+
+
+# Replace 'your_file.json' with your actual JSON file name
+json_filename = "data/full_benchmark/mabrax/merged_data/metrics.json"
+new_json_filename = (
+    "data/full_benchmark/mabrax/merged_data/metrics_stepcount_processed.json"
+)
+
+data = load_json(json_filename)
+processed_data = interpolate_steps(data)
+save_json(new_json_filename, processed_data)
diff --git a/process_win_rate.py b/process_win_rate.py
new file mode 100644
index 00000000..8b86451a
--- /dev/null
+++ b/process_win_rate.py
@@ -0,0 +1,38 @@
+import json
+
+
+def load_json(filename):
+    with open(filename) as file:
+        data = json.load(file)
+    return data
+
+
+# Save the modified data back into JSON
+def save_json(filename, data):
+    with open(filename, "w") as file:
+        json.dump(data, file, indent=4)
+
+
+def adjust_win_rate(data):
+    """Divide all win_rate metrics by 100."""
+    for env_name, task_data in data.items():
+        for task_name, alg_data in task_data.items():
+            for algorithm_name, seed_data in alg_data.items():
+                for seed_key, metrics in seed_data.items():
+                    for key, step_data in metrics.items():
+                        if "win_rate" in step_data:
+                            # Divide the win_rate values by 100
+                            step_data["win_rate"] = [
+                                value / 100 for value in step_data["win_rate"]
+                            ]
+
+    return data
+
+
+# Replace 'your_file.json' with your actual JSON file name
+json_filename = "./data/full-benchmark-update/merged_data/metrics_stepcount_processed.json"
+new_json_filename = "./data/full-benchmark-update/merged_data/metrics_winrate_processed.json"
+
+data = load_json(json_filename)
+processed_data = adjust_win_rate(data)
+save_json(new_json_filename, processed_data)
diff --git a/pull_data.py b/pull_data.py
new file mode 100644
index 00000000..54d05e88
--- /dev/null
+++ b/pull_data.py
@@ -0,0 +1,37 @@
+from marl_eval.json_tools import concatenate_json_files, pull_neptune_data
+
+neptune_tags = [
+        "mat-measure-set-benchmark-lbf",
+        "mat-measure-set-benchmark-rware",
+        "mat-measure-set-benchmark-smax",
+        # "retmat-h2-benchmark-no-brax",
+        # "retmat-simple-rewrite-h1-benchmark-no-xpos-no-brax",
+        "ruan-measure-set-smax-benchmark",
+        "rerun-mava-rec-systems-smax",
+        "rware-measure-set-benchmark-small-lr",
+        "rerun-mava-rec-systems-rware",
+        # "lbf_best_hyperparams",
+        # "retmat-h2-first-benchmark",
+        # "mat-measure-set-benchmark-mabrax",
+        # "vector-cleaner-measure-set-benchmark",
+        "vector-connector-measure-set-benchmark",
+        # "mat-measure-set-benchmark-vector-cleaner",
+        "mat-measure-set-benchmark-vector-connector",
+        # "retmat-simple-rewrite-hypothesis-1-benchmark-no-xpos",
+        # "retmat-h2-first-benchmark",
+        # "mat-measure-set-benchmark-mabrax",
+        # "liam-mabrax-benchmark-ppo-2",
+        "retmat-new-20M-sweep-benchmark",
+    ]
+
+for tag in neptune_tags:
+    pull_neptune_data(
+        project_name="InstaDeep/Mava",
+        tags=[tag],
+        store_directory="./data/full-benchmark-update",
+    )
+
+concatenate_json_files(
+    input_directory="./data/full-benchmark-update",
+    output_json_path="./data/full-benchmark-update/merged_data",
+)
diff --git a/remove_algo_data.py b/remove_algo_data.py
new file mode 100644
index 00000000..0fc9d2ab
--- /dev/null
+++ b/remove_algo_data.py
@@ -0,0 +1,53 @@
+import json
+
+
+# Load your JSON data
+def load_json(filename):
+    with open(filename) as file:
+        data = json.load(file)
+    return data
+
+
+# Save the modified data back into JSON
+def save_json(filename, data):
+    with open(filename, "w") as file:
+        json.dump(data, file, indent=4)
+
+
+def remove_retmat(data):
+    # Iterate through each env_name
+    for env_name, tasks in list(
+        data.items()
+    ):  # list() to allow modification during iteration
+        # Iterate through each task_name
+        for task_name, algorithms in list(tasks.items()):
+            # If "retmat" is an algorithm under the current task, remove it
+            if "retmat" in algorithms:
+                del data[env_name][task_name]["retmat"]
+
+            if "retmat_cont" in algorithms:
+                del data[env_name][task_name]["retmat_cont"]
+                # If the task becomes empty after removing "retmat", consider removing the task
+                # if not data[env_name][task_name]:
+                #     del data[env_name][task_name]
+            # Optionally, remove the env_name if it becomes empty
+            # if not data[env_name]:
+            #     del data[env_name]
+    return data
+
+
+# Main function to load, process, and save the JSON data
+def main(json_filename, new_json_filename):
+    data = load_json(json_filename)
+    data = remove_retmat(data)
+    save_json(new_json_filename, data)
+
+
+# Replace 'your_file.json' with your actual JSON file name
+# json_filename = "./data/full-benchmark-update/merged_data/metrics_winrate_processed.json"
+# new_json_filename = "./data/full-benchmark-update/merged_data/metrics_winrate_processed_no_retmat.json"
+
+json_filename = "./data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+new_json_filename = "./data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
+
+main(json_filename, new_json_filename)
diff --git a/remove_certain_tasks.py b/remove_certain_tasks.py
new file mode 100644
index 00000000..5d5c66ee
--- /dev/null
+++ b/remove_certain_tasks.py
@@ -0,0 +1,33 @@
+import json
+
+
+def filter_json(data, tasks_to_remove):
+    filtered_data = {}
+    for env_name, env_tasks in data.items():
+        filtered_env_tasks = {}
+        for task_name, task_algos in env_tasks.items():
+            if task_name not in tasks_to_remove:
+                filtered_env_tasks[task_name] = task_algos
+        if filtered_env_tasks:
+            filtered_data[env_name] = filtered_env_tasks
+    return filtered_data
+
+# Example usage:
+input_file = 'data/full-benchmark-update/merged_data/master_norm_episode_return.json'
+output_file = 'data/full-benchmark-update/merged_data/master_norm_episode_return.json'
+tasks_to_remove = [
+    'clean-20x20x15a',
+    'clean-15x15x6a',
+    'clean-10x10x3a'
+]  # Replace with your list of tasks to remove
+
+# Read the input JSON file
+with open(input_file, 'r') as f:
+    data = json.load(f)
+
+# Filter the data
+filtered_data = filter_json(data, tasks_to_remove)
+
+# Write the filtered data to the output JSON file
+with open(output_file, 'w') as f:
+    json.dump(filtered_data, f, indent=2)
\ No newline at end of file
diff --git a/rename_algos.py b/rename_algos.py
new file mode 100644
index 00000000..f8a8bca8
--- /dev/null
+++ b/rename_algos.py
@@ -0,0 +1,31 @@
+import json
+
+
+def rename_algorithms(data):
+    rename_map = {
+        "retmat_cont": "retmat",
+        "retmat_cont_memory": "retmat_memory",
+        "mat_cont": "mat"
+    }
+
+    for env_name in data:
+        for task_name in data[env_name]:
+            algos_to_rename = list(set(data[env_name][task_name].keys()) & set(rename_map.keys()))
+            for old_name in algos_to_rename:
+                new_name = rename_map[old_name]
+                data[env_name][task_name][new_name] = data[env_name][task_name].pop(old_name)
+                print(f"Renamed {old_name} to {new_name} in {env_name}/{task_name}")
+
+# Load the JSON file
+file_path = 'data/full-benchmark-update/merged_data/interim_seed_duplicated.json'  # Replace with your actual file path
+with open(file_path, 'r') as file:
+    data = json.load(file)
+
+# Rename the algorithms
+rename_algorithms(data)
+
+# Save the modified data back to the JSON file
+with open(file_path, 'w') as file:
+    json.dump(data, file, indent=2)
+
+print("JSON file has been updated with renamed algorithms.")
\ No newline at end of file