From b85d8221c8a702814d3c34e1d8264d6500d2ba0b Mon Sep 17 00:00:00 2001
From: RuanJohn <ruanjdekock@gmail.com>
Date: Wed, 28 Aug 2024 10:39:34 +0200
Subject: [PATCH] feat: restructure plotting scripts

---
 .../plot_data.py                              |   0
 .../plot_data_all_envs.py                     |   0
 .../plot_data_cleaner.py                      |   0
 .../plot_data_connector.py                    |  39 +++-
 .../plot_data_lbf.py                          |  13 +-
 .../plot_data_mabrax.py                       |  26 ++-
 .../plot_data_rware.py                        |  13 +-
 .../plot_data_smax.py                         |  13 +-
 .../check_for_absolutes.py                    |  29 +++
 .../duplicate_seed_data.py                    |   0
 data_processing_scripts/keep_certain_tasks.py |  39 ++++
 .../master_episode_norm.py                    |   0
 .../process_env_name.py                       |   8 +-
 .../process_step_counts.py                    |  61 +++++-
 .../process_step_counts_new.py                | 119 +++++++++++
 .../process_win_rate.py                       |   7 +-
 data_processing_scripts/pull_data.py          |  48 +++++
 .../pull_data_ablations.py                    | 188 ++++++++++++++++++
 .../pull_data_merge_neptune_tag.py            | 176 ++++++++++++++++
 .../remove_algo_data.py                       |   0
 .../remove_certain_tasks.py                   |  22 +-
 data_processing_scripts/remove_win_rate.py    |  44 ++++
 .../rename_algos.py                           |  21 +-
 .../set_same_seed_number.py                   |  57 ++++++
 keep_certain_tasks.py                         |  37 ----
 marl_eval/utils/data_processing_utils.py      |   9 +-
 process_step_counts_mabrax.py                 |  94 ---------
 pull_data.py                                  |  37 ----
 28 files changed, 876 insertions(+), 224 deletions(-)
 rename plot_data.py => data_plotting_scripts/plot_data.py (100%)
 rename plot_data_all_envs.py => data_plotting_scripts/plot_data_all_envs.py (100%)
 rename plot_data_cleaner.py => data_plotting_scripts/plot_data_cleaner.py (100%)
 rename plot_data_connector.py => data_plotting_scripts/plot_data_connector.py (81%)
 rename plot_data_lbf.py => data_plotting_scripts/plot_data_lbf.py (93%)
 rename plot_data_mabrax.py => data_plotting_scripts/plot_data_mabrax.py (81%)
 rename plot_data_rware.py => data_plotting_scripts/plot_data_rware.py (93%)
 rename plot_data_smax.py => data_plotting_scripts/plot_data_smax.py (93%)
 create mode 100644 data_processing_scripts/check_for_absolutes.py
 rename duplicate_seed_data.py => data_processing_scripts/duplicate_seed_data.py (100%)
 create mode 100644 data_processing_scripts/keep_certain_tasks.py
 rename master_episode_norm.py => data_processing_scripts/master_episode_norm.py (100%)
 rename process_env_name.py => data_processing_scripts/process_env_name.py (94%)
 rename process_step_counts.py => data_processing_scripts/process_step_counts.py (56%)
 create mode 100644 data_processing_scripts/process_step_counts_new.py
 rename process_win_rate.py => data_processing_scripts/process_win_rate.py (82%)
 create mode 100644 data_processing_scripts/pull_data.py
 create mode 100644 data_processing_scripts/pull_data_ablations.py
 create mode 100644 data_processing_scripts/pull_data_merge_neptune_tag.py
 rename remove_algo_data.py => data_processing_scripts/remove_algo_data.py (100%)
 rename remove_certain_tasks.py => data_processing_scripts/remove_certain_tasks.py (60%)
 create mode 100644 data_processing_scripts/remove_win_rate.py
 rename rename_algos.py => data_processing_scripts/rename_algos.py (57%)
 create mode 100644 data_processing_scripts/set_same_seed_number.py
 delete mode 100644 keep_certain_tasks.py
 delete mode 100644 process_step_counts_mabrax.py
 delete mode 100644 pull_data.py

diff --git a/plot_data.py b/data_plotting_scripts/plot_data.py
similarity index 100%
rename from plot_data.py
rename to data_plotting_scripts/plot_data.py
diff --git a/plot_data_all_envs.py b/data_plotting_scripts/plot_data_all_envs.py
similarity index 100%
rename from plot_data_all_envs.py
rename to data_plotting_scripts/plot_data_all_envs.py
diff --git a/plot_data_cleaner.py b/data_plotting_scripts/plot_data_cleaner.py
similarity index 100%
rename from plot_data_cleaner.py
rename to data_plotting_scripts/plot_data_cleaner.py
diff --git a/plot_data_connector.py b/data_plotting_scripts/plot_data_connector.py
similarity index 81%
rename from plot_data_connector.py
rename to data_plotting_scripts/plot_data_connector.py
index 3ea066be..80b28d25 100644
--- a/plot_data_connector.py
+++ b/data_plotting_scripts/plot_data_connector.py
@@ -30,14 +30,16 @@
     data_process_pipeline,
 )
 
+base_folder_name = "biggest-benchmark"
+
 ENV_NAME = "MaConnector"
 SAVE_PDF = False
 
-data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
-png_plot_dir = "plots/full-benchmark-update/connector_no_retmat/png/"
-pdf_plot_dir = "plots/full-benchmark-update/connector_no_retmat/pdf/"
+data_dir = f"data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+png_plot_dir = f"plots/{base_folder_name}/connector_no_retmat/png/"
+pdf_plot_dir = f"plots/{base_folder_name}/connector_no_retmat/pdf/"
 
-PLOT_METRIC = "win_rate" # "mean_episode_return"
+PLOT_METRIC = "mean_episode_return"  # "mean_episode_return" "win_rate"
 
 legend_map = {
     "rec_mappo": "Rec MAPPO",
@@ -45,8 +47,10 @@
     "ff_mappo": "FF MAPPO",
     "ff_ippo": "FF IPPO",
     "mat": "MAT",
-    # "retmat": "RetMAT",
+    "retmat": "RetMAT",
     "retmat_memory": "RetMAT Memory",
+    "ff_happo": "FF HAPPO",
+    "rec_happo": "Rec HAPPO",
     # "retmat_main_memory": "RetMAT Main Memory",
     # "retmat_yarn_memory": "RetMAT Yarn Memory",
 }
@@ -106,9 +110,13 @@
     ],
     legend_map=legend_map,
 )
-fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_prob_of_improvement.png", bbox_inches="tight")
+fig.figure.savefig(
+    f"{png_plot_dir}_{PLOT_METRIC}_prob_of_improvement.png", bbox_inches="tight"
+)
 if SAVE_PDF:
-    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_prob_of_improvement.pdf", bbox_inches="tight")
+    fig.figure.savefig(
+        f"{pdf_plot_dir}_{PLOT_METRIC}_prob_of_improvement.pdf", bbox_inches="tight"
+    )
 
 # aggregate scores
 fig, _, _ = aggregate_scores(  # type: ignore
@@ -117,10 +125,15 @@
     metrics_to_normalize=METRICS_TO_NORMALIZE,
     save_tabular_as_latex=True,
     legend_map=legend_map,
+    tabular_results_file_path=f"{png_plot_dir[:-4]}aggregated_score",
+)
+fig.figure.savefig(
+    f"{png_plot_dir}_{PLOT_METRIC}_aggregate_scores.png", bbox_inches="tight"
 )
-fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_aggregate_scores.png", bbox_inches="tight")
 if SAVE_PDF:
-    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_aggregate_scores.pdf", bbox_inches="tight")
+    fig.figure.savefig(
+        f"{pdf_plot_dir}_{PLOT_METRIC}_aggregate_scores.pdf", bbox_inches="tight"
+    )
 
 # performance profiles
 fig = performance_profiles(
@@ -129,9 +142,13 @@
     metrics_to_normalize=METRICS_TO_NORMALIZE,
     legend_map=legend_map,
 )
-fig.figure.savefig(f"{png_plot_dir}_{PLOT_METRIC}_performance_profile.png", bbox_inches="tight")
+fig.figure.savefig(
+    f"{png_plot_dir}_{PLOT_METRIC}_performance_profile.png", bbox_inches="tight"
+)
 if SAVE_PDF:
-    fig.figure.savefig(f"{pdf_plot_dir}_{PLOT_METRIC}_performance_profile.pdf", bbox_inches="tight")
+    fig.figure.savefig(
+        f"{pdf_plot_dir}_{PLOT_METRIC}_performance_profile.pdf", bbox_inches="tight"
+    )
 
 
 ##############################
diff --git a/plot_data_lbf.py b/data_plotting_scripts/plot_data_lbf.py
similarity index 93%
rename from plot_data_lbf.py
rename to data_plotting_scripts/plot_data_lbf.py
index c61d13ad..be654cb1 100644
--- a/plot_data_lbf.py
+++ b/data_plotting_scripts/plot_data_lbf.py
@@ -30,12 +30,14 @@
     data_process_pipeline,
 )
 
+base_folder_name = "biggest-benchmark"
+
 ENV_NAME = "LevelBasedForaging"
 SAVE_PDF = False
 
-data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
-png_plot_dir = "plots/full-benchmark-update/lbf_no_retmat/png/"
-pdf_plot_dir = "plots/full-benchmark-update/lbf_no_retmat/pdf/"
+data_dir = f"data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+png_plot_dir = f"plots/{base_folder_name}/lbf_no_retmat/png/"
+pdf_plot_dir = f"plots/{base_folder_name}/lbf_no_retmat/pdf/"
 
 legend_map = {
     "rec_mappo": "Rec MAPPO",
@@ -43,8 +45,10 @@
     "ff_mappo": "FF MAPPO",
     "ff_ippo": "FF IPPO",
     "mat": "MAT",
-    # "retmat": "RetMAT",
+    "retmat": "RetMAT",
     "retmat_memory": "RetMAT Memory",
+    "ff_happo": "FF HAPPO",
+    "rec_happo": "Rec HAPPO",
     # "retmat_main_memory": "RetMAT Main Memory",
     # "retmat_yarn_memory": "RetMAT Yarn Memory",
 }
@@ -115,6 +119,7 @@
     metrics_to_normalize=METRICS_TO_NORMALIZE,
     save_tabular_as_latex=True,
     legend_map=legend_map,
+    tabular_results_file_path=f"{png_plot_dir[:-4]}aggregated_score",
 )
 fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
 if SAVE_PDF:
diff --git a/plot_data_mabrax.py b/data_plotting_scripts/plot_data_mabrax.py
similarity index 81%
rename from plot_data_mabrax.py
rename to data_plotting_scripts/plot_data_mabrax.py
index fda090a8..11c85461 100644
--- a/plot_data_mabrax.py
+++ b/data_plotting_scripts/plot_data_mabrax.py
@@ -30,12 +30,14 @@
     data_process_pipeline,
 )
 
+base_folder_name = "biggest-benchmark-sac"
+
 ENV_NAME = "MaBrax"
 SAVE_PDF = False
 
-data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
-png_plot_dir = "plots/full-benchmark-update/mabrax_no_retmat/png/"
-pdf_plot_dir = "plots/full-benchmark-update/mabrax_no_retmat/pdf/"
+data_dir = f"data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+png_plot_dir = f"plots/{base_folder_name}/mabrax/png/"
+pdf_plot_dir = f"plots/{base_folder_name}/mabrax/pdf/"
 
 legend_map = {
     "rec_mappo": "Rec MAPPO",
@@ -43,11 +45,25 @@
     "ff_mappo": "FF MAPPO",
     "ff_ippo": "FF IPPO",
     "mat": "MAT",
-    # "retmat": "RetMAT",
+    "retmat": "RetMAT",
     "retmat_memory": "RetMAT Memory",
+    "ff_happo": "FF HAPPO",
+    "rec_happo": "Rec HAPPO",
+    "ff_masac": "FF MASAC",
+    "ff_hasac": "FF HASAC",
     # "retmat_main_memory": "RetMAT Main Memory",
     # "retmat_yarn_memory": "RetMAT Yarn Memory",
 }
+# legend_map = {
+#     "retmat_cont_memory_single-device-64-envs_mava-cont-system-lr-decay": "mava-nets-tpu-64-envs",
+#     "retmat_cont_memory_no-lr-decay-base": "default-no-lr-decay",
+#     "retmat_cont_memory_mava-cont-system_no-lr-decay": "mava-nets-no-lr-decay",
+#     "retmat_cont_memory_mava-cont-system_lr-decay": "mava-nets",
+#     "retmat_cont_memory_increase-epochs_mava-cont-system-lr-decay": "mava-nets-increase-epochs",
+#     "retmat_cont_memory_double-lr_mava-cont-system-lr-decay": "mava-nets-double-lr",
+#     "retmat_cont_memory_on-gpu-64-envs_mava-cont-system-lr-decay": "mava-nets-gpu-64-envs",
+# }
+# base_algo = "retmat_cont_memory_on-gpu-64-envs_mava-cont-system-lr-decay"
 
 ##############################
 # Read in and process data
@@ -102,6 +118,7 @@
         # ["retmat_memory", "retmat"],
         # ["retmat_yarn_memory", "mat"],
     ],
+    # algorithms_to_compare = [[base_algo, other_algo] for other_algo in legend_map.keys() if other_algo != base_algo],
     legend_map=legend_map,
 )
 fig.figure.savefig(f"{png_plot_dir}prob_of_improvement.png", bbox_inches="tight")
@@ -115,6 +132,7 @@
     metrics_to_normalize=METRICS_TO_NORMALIZE,
     save_tabular_as_latex=True,
     legend_map=legend_map,
+    tabular_results_file_path=f"{png_plot_dir[:-4]}aggregated_score",
 )
 fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
 if SAVE_PDF:
diff --git a/plot_data_rware.py b/data_plotting_scripts/plot_data_rware.py
similarity index 93%
rename from plot_data_rware.py
rename to data_plotting_scripts/plot_data_rware.py
index 6cce150c..91781ead 100644
--- a/plot_data_rware.py
+++ b/data_plotting_scripts/plot_data_rware.py
@@ -30,12 +30,14 @@
     data_process_pipeline,
 )
 
+base_folder_name = "biggest-benchmark"
+
 ENV_NAME = "RobotWarehouse"
 SAVE_PDF = False
 
-data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
-png_plot_dir = "plots/full-benchmark-update/rware_no_retmat/png/"
-pdf_plot_dir = "plots/full-benchmark-update/rware_no_retmat/pdf/"
+data_dir = f"data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+png_plot_dir = f"plots/{base_folder_name}/rware_no_retmat/png/"
+pdf_plot_dir = f"plots/{base_folder_name}/rware_no_retmat/pdf/"
 
 legend_map = {
     "rec_mappo": "Rec MAPPO",
@@ -43,8 +45,10 @@
     "ff_mappo": "FF MAPPO",
     "ff_ippo": "FF IPPO",
     "mat": "MAT",
-    # "retmat": "RetMAT",
+    "retmat": "RetMAT",
     "retmat_memory": "RetMAT Memory",
+    "ff_happo": "FF HAPPO",
+    "rec_happo": "Rec HAPPO",
     # "retmat_main_memory": "RetMAT Main Memory",
     # "retmat_yarn_memory": "RetMAT Yarn Memory",
 }
@@ -115,6 +119,7 @@
     metrics_to_normalize=METRICS_TO_NORMALIZE,
     save_tabular_as_latex=True,
     legend_map=legend_map,
+    tabular_results_file_path=f"{png_plot_dir[:-4]}aggregated_score",
 )
 fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
 if SAVE_PDF:
diff --git a/plot_data_smax.py b/data_plotting_scripts/plot_data_smax.py
similarity index 93%
rename from plot_data_smax.py
rename to data_plotting_scripts/plot_data_smax.py
index 82a210eb..8b61ca24 100644
--- a/plot_data_smax.py
+++ b/data_plotting_scripts/plot_data_smax.py
@@ -30,12 +30,14 @@
     data_process_pipeline,
 )
 
+base_folder_name = "biggest-benchmark"
+
 ENV_NAME = "Smax"
 SAVE_PDF = False
 
-data_dir = "data/full-benchmark-update/merged_data/interim_seed_duplicated.json"
-png_plot_dir = "plots/full-benchmark-update/smax_no_retmat/png/"
-pdf_plot_dir = "plots/full-benchmark-update/smax_no_retmat/pdf/"
+data_dir = f"data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+png_plot_dir = f"plots/{base_folder_name}/smax_no_retmat/png/"
+pdf_plot_dir = f"plots/{base_folder_name}/smax_no_retmat/pdf/"
 
 legend_map = {
     "rec_mappo": "Rec MAPPO",
@@ -43,8 +45,10 @@
     "ff_mappo": "FF MAPPO",
     "ff_ippo": "FF IPPO",
     "mat": "MAT",
-    # "retmat": "RetMAT",
+    "retmat": "RetMAT",
     "retmat_memory": "RetMAT Memory",
+    "ff_happo": "FF HAPPO",
+    "rec_happo": "Rec HAPPO",
     # "retmat_main_memory": "RetMAT Main Memory",
     # "retmat_yarn_memory": "RetMAT Yarn Memory",
 }
@@ -115,6 +119,7 @@
     metrics_to_normalize=METRICS_TO_NORMALIZE,
     save_tabular_as_latex=True,
     legend_map=legend_map,
+    tabular_results_file_path=f"{png_plot_dir[:-4]}aggregated_score",
 )
 fig.figure.savefig(f"{png_plot_dir}aggregate_scores.png", bbox_inches="tight")
 if SAVE_PDF:
diff --git a/data_processing_scripts/check_for_absolutes.py b/data_processing_scripts/check_for_absolutes.py
new file mode 100644
index 00000000..9dda61e7
--- /dev/null
+++ b/data_processing_scripts/check_for_absolutes.py
@@ -0,0 +1,29 @@
+import json
+
+
+def check_for_absolutes(data):
+    for env_name in data:
+        for task_name in data[env_name]:
+            for algo_name in data[env_name][task_name]:
+                for seed in data[env_name][task_name][algo_name]:
+                    if (
+                        "absolute_metrics"
+                        not in data[env_name][task_name][algo_name][seed]
+                    ):
+                        print(
+                            f"Found absolute metrics not in {env_name}/{task_name}/{algo_name}/{seed}"
+                        )
+                    elif (
+                        "absolute_metrics" in data[env_name][task_name][algo_name][seed]
+                        # and "sac" in algo_name
+                    ):
+                        print(
+                            f"Found absolute metrics in {env_name}/{task_name}/{algo_name}/{seed}"
+                        )
+
+
+in_file_path = "data/biggest-benchmark-sac/merged_data/metrics_winrate_processed.json"
+with open(in_file_path, "r") as file:
+    data = json.load(file)
+
+check_for_absolutes(data)
diff --git a/duplicate_seed_data.py b/data_processing_scripts/duplicate_seed_data.py
similarity index 100%
rename from duplicate_seed_data.py
rename to data_processing_scripts/duplicate_seed_data.py
diff --git a/data_processing_scripts/keep_certain_tasks.py b/data_processing_scripts/keep_certain_tasks.py
new file mode 100644
index 00000000..d92afc73
--- /dev/null
+++ b/data_processing_scripts/keep_certain_tasks.py
@@ -0,0 +1,39 @@
+import json
+
+
+def filter_json(data, tasks_to_keep):
+    filtered_data = {}
+    for env_name, env_tasks in data.items():
+        kept_tasks = {
+            task: info for task, info in env_tasks.items() if task in tasks_to_keep
+        }
+        if kept_tasks:
+            filtered_data[env_name] = kept_tasks
+    return filtered_data
+
+
+base_folder_name = "biggest-benchmark-sac"
+
+# Example usage:
+input_file = f"./data/{base_folder_name}/merged_data/metrics_winrate_processed.json"
+output_file = f"./data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+tasks_to_keep = [
+    "hopper_3x1",
+    "halfcheetah_6x1",
+    "walker2d_2x3",
+    "ant_4x2",
+    # "humanoid_9|8",
+]  # Replace with your list of tasks to keep
+
+# Read the input JSON file
+with open(input_file, "r") as f:
+    data = json.load(f)
+
+# Filter the data
+filtered_data = filter_json(data, tasks_to_keep)
+
+# Write the filtered data to the output JSON file
+with open(output_file, "w") as f:
+    json.dump(filtered_data, f, indent=2)
+
+print(f"Filtered data has been written to {output_file}")
diff --git a/master_episode_norm.py b/data_processing_scripts/master_episode_norm.py
similarity index 100%
rename from master_episode_norm.py
rename to data_processing_scripts/master_episode_norm.py
diff --git a/process_env_name.py b/data_processing_scripts/process_env_name.py
similarity index 94%
rename from process_env_name.py
rename to data_processing_scripts/process_env_name.py
index 1c6e229b..6b426341 100644
--- a/process_env_name.py
+++ b/data_processing_scripts/process_env_name.py
@@ -88,9 +88,9 @@ def main(json_filename, new_json_filename):
     save_json(new_json_filename, data)
 
 
+base_folder_name = "biggest-benchmark-sac"
+
 # Replace 'your_file.json' with your actual JSON file name
-json_filename = (
-    "./data/full-benchmark-update/merged_data/metrics.json"
-)
-new_json_filename = "./data/full-benchmark-update/merged_data/metrics_name_processed.json"
+json_filename = f"./data/{base_folder_name}/merged_data/metrics.json"
+new_json_filename = f"./data/{base_folder_name}/merged_data/metrics_name_processed.json"
 main(json_filename, new_json_filename)
diff --git a/process_step_counts.py b/data_processing_scripts/process_step_counts.py
similarity index 56%
rename from process_step_counts.py
rename to data_processing_scripts/process_step_counts.py
index e8f0b953..c8ae5554 100644
--- a/process_step_counts.py
+++ b/data_processing_scripts/process_step_counts.py
@@ -30,7 +30,7 @@ def interpolate_steps(data):
                     max_step_index = max(int(key.split("_")[1]) for key in step_keys)
 
                     if max_step_index < 121:
-                    # if max_step_index < 199:
+                        print("INTERP UP")
                         # Interpolation
                         x = np.array([int(k.split("_")[1]) for k in step_keys])
                         y_step_count = np.array(
@@ -54,9 +54,54 @@ def interpolate_steps(data):
                         x_new = np.linspace(
                             0, max_step_index, 122
                         )  # Ensure covering up to step_121
-                        # x_new = np.linspace(
-                        #     0, max_step_index, 200
-                        # )  # Ensure covering up to step_199
+                        step_count_interp = np.interp(x_new, x, y_step_count)
+                        elapsed_time_interp = np.interp(x_new, x, y_elapsed_time)
+                        metrics_interp = {
+                            metric: np.interp(x_new, x, y)
+                            for metric, y in y_metrics.items()
+                        }
+
+                        # Update the data with interpolated values
+                        for i in range(
+                            len(x_new)
+                        ):  # Adjusted to iterate over the new range
+                            # Now directly using i to ensure step_121 is included
+                            step_key = f"step_{i}"
+                            metrics[step_key] = {
+                                "step_count": int(step_count_interp[i]),
+                                "elapsed_time": elapsed_time_interp[i],
+                                "steps_per_second": metrics[step_keys[-1]][
+                                    "steps_per_second"
+                                ],  # Duplicating the last value
+                            }
+                            for metric, y in metrics_interp.items():
+                                metrics[step_key][metric] = [y[i]]
+
+                    if max_step_index > 121:
+                        print("INTERP DOWN")
+                        # Interpolation
+                        x = np.array([int(k.split("_")[1]) for k in step_keys])
+                        y_step_count = np.array(
+                            [metrics[k]["step_count"] for k in step_keys]
+                        )
+                        y_elapsed_time = np.array(
+                            [metrics[k]["elapsed_time"] for k in step_keys]
+                        )
+
+                        metric_keys = [
+                            k
+                            for k in metrics[step_keys[0]].keys()
+                            if k
+                            not in ["step_count", "elapsed_time", "steps_per_second"]
+                        ]
+                        y_metrics = {
+                            metric: np.array([metrics[k][metric][0] for k in step_keys])
+                            for metric in metric_keys
+                        }
+
+                        x_new = np.linspace(
+                            0, 122, 122
+                        )  # Ensure covering up to step_121
                         step_count_interp = np.interp(x_new, x, y_step_count)
                         elapsed_time_interp = np.interp(x_new, x, y_elapsed_time)
                         metrics_interp = {
@@ -83,9 +128,13 @@ def interpolate_steps(data):
     return data
 
 
+base_folder_name = "biggest-benchmark-sac"
+
 # Replace 'your_file.json' with your actual JSON file name
-json_filename = "./data/full-benchmark-update/merged_data/metrics_name_processed.json"
-new_json_filename = "./data/full-benchmark-update/merged_data/metrics_stepcount_processed.json"
+json_filename = f"./data/{base_folder_name}/merged_data/metrics_name_processed.json"
+new_json_filename = (
+    f"./data/{base_folder_name}/merged_data/metrics_stepcount_processed_old_update.json"
+)
 
 data = load_json(json_filename)
 processed_data = interpolate_steps(data)
diff --git a/data_processing_scripts/process_step_counts_new.py b/data_processing_scripts/process_step_counts_new.py
new file mode 100644
index 00000000..5d5ba017
--- /dev/null
+++ b/data_processing_scripts/process_step_counts_new.py
@@ -0,0 +1,119 @@
+import json
+from collections import OrderedDict
+
+import numpy as np
+
+
+def load_json(filename):
+    with open(filename) as file:
+        data = json.load(file)
+    return data
+
+
+def save_json(filename, data):
+    with open(filename, "w") as file:
+        json.dump(data, file, indent=4)
+
+
+def interpolate_steps(data):
+    for env_name, env_data in data.items():
+        for task_name, task_data in env_data.items():
+            for algo_name, algo_data in task_data.items():
+                for seed_key, seed_data in algo_data.items():
+                    # Preserve absolute_metrics
+                    absolute_metrics = seed_data["absolute_metrics"]
+
+                    step_keys = sorted(
+                        [k for k in seed_data.keys() if k.startswith("step_")],
+                        key=lambda x: int(x.split("_")[1]),
+                    )
+
+                    if len(step_keys) == 122:
+                        continue  # Already has 122 steps, no interpolation needed
+
+                    # Prepare data for interpolation
+                    x = np.array([int(k.split("_")[1]) for k in step_keys])
+                    y_step_count = np.array(
+                        [seed_data[k]["step_count"] for k in step_keys]
+                    )
+                    y_elapsed_time = np.array(
+                        [seed_data[k]["elapsed_time"] for k in step_keys]
+                    )
+
+                    metric_keys = [
+                        k
+                        for k in seed_data[step_keys[0]].keys()
+                        if k not in ["step_count", "elapsed_time", "steps_per_second"]
+                    ]
+                    y_metrics = {
+                        metric: np.array([seed_data[k][metric][0] for k in step_keys])
+                        for metric in metric_keys
+                    }
+
+                    # New x values for interpolation
+                    x_new = np.linspace(0, 121, 122)
+
+                    # Interpolate
+                    step_count_interp = np.interp(x_new, x, y_step_count)
+                    elapsed_time_interp = np.interp(x_new, x, y_elapsed_time)
+                    metrics_interp = {
+                        metric: np.interp(x_new, x, y)
+                        for metric, y in y_metrics.items()
+                    }
+
+                    # Preserve first and last values
+                    step_count_interp[0], step_count_interp[-1] = (
+                        y_step_count[0],
+                        y_step_count[-1],
+                    )
+                    elapsed_time_interp[0], elapsed_time_interp[-1] = (
+                        y_elapsed_time[0],
+                        y_elapsed_time[-1],
+                    )
+                    for metric in metrics_interp:
+                        metrics_interp[metric][0] = y_metrics[metric][0]
+                        metrics_interp[metric][-1] = y_metrics[metric][-1]
+
+                    # Create new seed data structure
+                    new_seed_data = OrderedDict()
+                    new_seed_data["absolute_metrics"] = absolute_metrics
+
+                    for i in range(122):
+                        step_key = f"step_{i}"
+                        new_seed_data[step_key] = {
+                            "step_count": int(step_count_interp[i]),
+                            "elapsed_time": elapsed_time_interp[i],
+                            "steps_per_second": [
+                                seed_data[step_keys[-1]]["steps_per_second"][0]
+                            ],  # Duplicate last value
+                        }
+                        for metric in metric_keys:
+                            new_seed_data[step_key][metric] = [
+                                metrics_interp[metric][i]
+                            ]
+
+                    # Replace the old seed data with the new one
+                    algo_data[seed_key] = new_seed_data
+
+                    if len(step_keys) < 122:
+                        print(
+                            f"INTERP UP: {env_name}, {task_name}, {algo_name}, {seed_key}"
+                        )
+                    else:
+                        print(
+                            f"INTERP DOWN: {env_name}, {task_name}, {algo_name}, {seed_key}"
+                        )
+
+    return data
+
+
+base_folder_name = "biggest-benchmark-sac"
+
+json_filename = f"./data/{base_folder_name}/merged_data/metrics_name_processed.json"
+new_json_filename = (
+    f"./data/{base_folder_name}/merged_data/metrics_stepcount_processed.json"
+)
+
+data = load_json(json_filename)
+processed_data = interpolate_steps(data)
+save_json(new_json_filename, processed_data)
diff --git a/process_win_rate.py b/data_processing_scripts/process_win_rate.py
similarity index 82%
rename from process_win_rate.py
rename to data_processing_scripts/process_win_rate.py
index 8b86451a..5419bfc8 100644
--- a/process_win_rate.py
+++ b/data_processing_scripts/process_win_rate.py
@@ -29,9 +29,12 @@ def adjust_win_rate(data):
     return data
 
 
+base_folder_name = "biggest-benchmark-sac"
 # Replace 'your_file.json' with your actual JSON file name
-json_filename = "./data/full-benchmark-update/merged_data/metrics_stepcount_processed.json"
-new_json_filename = "./data/full-benchmark-update/merged_data/metrics_winrate_processed.json"
+json_filename = f"./data/{base_folder_name}/merged_data/metrics_stepcount_processed.json"
+new_json_filename = (
+    f"./data/{base_folder_name}/merged_data/metrics_winrate_processed.json"
+)
 
 data = load_json(json_filename)
 processed_data = adjust_win_rate(data)
diff --git a/data_processing_scripts/pull_data.py b/data_processing_scripts/pull_data.py
new file mode 100644
index 00000000..19aaec6d
--- /dev/null
+++ b/data_processing_scripts/pull_data.py
@@ -0,0 +1,48 @@
+import time
+
+from marl_eval.json_tools import concatenate_json_files, pull_neptune_data
+
+neptune_tags = [
+    "mat-measure-set-benchmark-lbf",
+    "mat-measure-set-benchmark-rware",
+    "mat-measure-set-benchmark-smax",
+    # "retmat-h2-benchmark-no-brax",
+    # "retmat-simple-rewrite-h1-benchmark-no-xpos-no-brax",
+    "ruan-measure-set-smax-benchmark",
+    "rerun-mava-rec-systems-smax",
+    "rware-measure-set-benchmark-small-lr",
+    "rerun-mava-rec-systems-rware",
+    # "lbf_best_hyperparams",
+    # "retmat-h2-first-benchmark",
+    # "mat-measure-set-benchmark-mabrax",
+    # "vector-cleaner-measure-set-benchmark",
+    "vector-connector-measure-set-benchmark",
+    # "mat-measure-set-benchmark-vector-cleaner",
+    "mat-measure-set-benchmark-vector-connector",
+    # "retmat-simple-rewrite-hypothesis-1-benchmark-no-xpos",
+    # "retmat-h2-first-benchmark",
+    # "mat-measure-set-benchmark-mabrax",
+    # "liam-mabrax-benchmark-ppo-2",
+    "retmat-new-20M-sweep-benchmark",
+    "happo-measure-set-benchmark",
+    "ppo-systems-benchmark-more-rware-scenarios",
+    # "qmix-v2-benchmark-2",
+    "sac-mabrax-benchmark",
+    "retmat-new-20M-sweep-benchmark-relaunches",
+]
+
+base_folder_name = "biggest-benchmark-sac"
+
+for tag in neptune_tags:
+    pull_neptune_data(
+        project_name="InstaDeep/Mava",
+        tags=[tag],
+        store_directory=f"./data/{base_folder_name}",
+    )
+
+time.sleep(2)
+
+concatenate_json_files(
+    input_directory=f"./data/{base_folder_name}",
+    output_json_path=f"./data/{base_folder_name}/merged_data",
+)
diff --git a/data_processing_scripts/pull_data_ablations.py b/data_processing_scripts/pull_data_ablations.py
new file mode 100644
index 00000000..0be60a7a
--- /dev/null
+++ b/data_processing_scripts/pull_data_ablations.py
@@ -0,0 +1,188 @@
+import json
+import logging
+import os
+import zipfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any, Dict, List
+
+import neptune
+from colorama import Fore, Style
+from tqdm import tqdm
+
+from marl_eval.json_tools import concatenate_json_files
+
+
+def pull_neptune_data(
+    project_name: str,
+    tag: str,
+    columns: List[str],
+    store_directory: str = "./downloaded_json_data",
+    neptune_data_key: str = "metrics",
+    disable_progress_bar: bool = False,
+) -> None:
+    """Downloads logs from a Neptune project based on provided tag and processes JSON files.
+
+    Args:
+        project_name (str): Name of the Neptune project.
+        tag (str): Tag associated with the desired experiments.
+        columns (List[str]): List of column names to include in the algorithm name.
+        store_directory (str, optional): Directory to store the downloaded logs.
+            Default is "./downloaded_json_data".
+        neptune_data_key (str, optional): Key for the Neptune data to download.
+            Default is "metrics".
+        disable_progress_bar (bool, optional): Whether to hide a progress bar.
+            Default is False.
+
+    Raises:
+        ValueError: If the provided project name or tag is invalid.
+    """
+    # Create the log directory if it doesn't exist
+    os.makedirs(store_directory, exist_ok=True)
+
+    # Disable Neptune logging
+    neptune_logger = logging.getLogger("neptune")
+    neptune_logger.setLevel(logging.ERROR)
+
+    # Initialize the Neptune project
+    try:
+        project = neptune.init_project(project=project_name)
+    except Exception as e:
+        raise ValueError(f"Invalid project name '{project_name}': {e}")
+
+    # Fetch runs based on provided tag
+    try:
+        runs_table_df = project.fetch_runs_table(
+            state="inactive", columns=["sys/id"] + columns, tag=tag, sort_by="sys/id"
+        ).to_pandas()
+    except Exception as e:
+        raise ValueError(f"Invalid tag {tag}: {e}")
+
+    run_ids = runs_table_df["sys/id"].values.tolist()
+
+    # Download logs and process JSON files concurrently
+    with ThreadPoolExecutor() as executor:
+        futures = [
+            executor.submit(
+                _download_and_process_data,
+                project_name,
+                run_id,
+                store_directory,
+                neptune_data_key,
+                runs_table_df.loc[runs_table_df["sys/id"] == run_id, columns]
+                .iloc[0]
+                .to_dict(),
+            )
+            for run_id in run_ids
+        ]
+        for future in tqdm(
+            as_completed(futures),
+            total=len(futures),
+            desc="Downloading and processing data",
+            disable=disable_progress_bar,
+        ):
+            future.result()
+
+    # Restore neptune logger level
+    neptune_logger.setLevel(logging.INFO)
+    print(
+        f"{Fore.CYAN}{Style.BRIGHT}Data downloaded and processed successfully!{Style.RESET_ALL}"
+    )
+
+
+def _download_and_process_data(
+    project_name: str,
+    run_id: str,
+    store_directory: str,
+    neptune_data_key: str,
+    column_values: Dict[str, Any],
+) -> None:
+    """Downloads and processes data for a single Neptune run."""
+    try:
+        with neptune.init_run(
+            project=project_name, with_id=run_id, mode="read-only"
+        ) as run:
+            for j, data_key in enumerate(
+                run.get_structure()[neptune_data_key].keys(), start=1
+            ):
+                file_path = f"{store_directory}/{run_id}"
+                if j > 1:
+                    file_path += f"_{j}"
+                run[f"{neptune_data_key}/{data_key}"].download(destination=file_path)
+                _extract_and_process_zip_file(file_path, column_values)
+    except Exception as e:
+        print(f"Error processing data for run {run_id}: {e}")
+
+
+def _extract_and_process_zip_file(
+    file_path: str, column_values: Dict[str, Any]
+) -> None:
+    """Extracts a zip file and processes any JSON files within it."""
+    try:
+        with zipfile.ZipFile(file_path, "r") as zip_ref:
+            for member in zip_ref.infolist():
+                if not member.is_dir():
+                    target_path = Path(f"{file_path}{Path(member.filename).suffix}")
+                    target_path.parent.mkdir(parents=True, exist_ok=True)
+                    with zip_ref.open(member) as src, target_path.open("wb") as dest:
+                        dest.write(src.read())
+
+                    # Process the JSON file after extraction
+                    if target_path.suffix == ".json":
+                        process_json_file(str(target_path), column_values)
+
+            # Remove the zip file
+            os.remove(file_path)
+    except zipfile.BadZipFile:
+        # If the file is not zipped, no action is required
+        pass
+    except Exception as e:
+        print(f"Error while unzipping or processing data at path {file_path}: {e}")
+
+
+def process_json_file(file_path: str, column_values: Dict[str, Any]) -> None:
+    """Processes a single JSON file, updating algo_names with additional details."""
+    with open(file_path, "r") as f:
+        data = json.load(f)
+
+    # Update algo_name in the JSON structure
+    for env_name, tasks in data.items():
+        if isinstance(tasks, dict):
+            for task_name, algos in tasks.items():
+                if isinstance(algos, dict):
+                    updated_algos = {}
+                    for algo_name, algo_data in algos.items():
+                        # Create new algorithm name with additional details
+                        new_algo_name = algo_name
+                        for i, (column, value) in enumerate(
+                            column_values.items(), start=1
+                        ):
+                            new_algo_name += f"-{value}"
+                        updated_algos[new_algo_name] = algo_data
+                    tasks[task_name] = updated_algos
+
+    # Write the updated data back to the file
+    with open(file_path, "w") as f:
+        json.dump(data, f, indent=2)
+
+
+# Main execution
+if __name__ == "__main__":
+    neptune_tag = "retmat-memory-timestep-chunkwise-ablation"
+    columns_to_include = [
+        "config/network/chunkwise/chunk_size",
+    ]  # Add the columns you want to include
+
+    base_folder_name = "retmat-timestep-chunkwise-ablation"
+
+    pull_neptune_data(
+        project_name="InstaDeep/Mava",
+        tag=neptune_tag,
+        columns=columns_to_include,
+        store_directory=f"./data/{base_folder_name}",
+    )
+
+    concatenate_json_files(
+        input_directory=f"./data/{base_folder_name}",
+        output_json_path=f"./data/{base_folder_name}/merged_data",
+    )
diff --git a/data_processing_scripts/pull_data_merge_neptune_tag.py b/data_processing_scripts/pull_data_merge_neptune_tag.py
new file mode 100644
index 00000000..fa88ee71
--- /dev/null
+++ b/data_processing_scripts/pull_data_merge_neptune_tag.py
@@ -0,0 +1,176 @@
+import json
+import logging
+import os
+import zipfile
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+import neptune
+from colorama import Fore, Style
+from tqdm import tqdm
+
+from marl_eval.json_tools import concatenate_json_files
+
+neptune_tags = [
+    "retmat-cont-systems-debug"
+]
+
+def pull_neptune_data(
+    project_name: str,
+    tags: List[str],
+    store_directory: str = "./downloaded_json_data",
+    neptune_data_key: str = "metrics",
+    disable_progress_bar: bool = False,
+) -> None:
+    """Downloads logs from a Neptune project based on provided tags and processes JSON files.
+
+    Args:
+        project_name (str): Name of the Neptune project.
+        tags (List[str]): List of tags associated with the desired experiments.
+        store_directory (str, optional): Directory to store the downloaded logs.
+            Default is "./downloaded_json_data".
+        neptune_data_key (str, optional): Key for the Neptune data to download.
+            Default is "metrics".
+        disable_progress_bar (bool, optional): Whether to hide a progress bar.
+            Default is False.
+
+    Raises:
+        ValueError: If the provided project name or tags are invalid.
+    """
+    # Create the log directory if it doesn't exist
+    os.makedirs(store_directory, exist_ok=True)
+
+    # Disable Neptune logging
+    neptune_logger = logging.getLogger("neptune")
+    neptune_logger.setLevel(logging.ERROR)
+
+    # Initialize the Neptune project
+    try:
+        project = neptune.init_project(project=project_name)
+    except Exception as e:
+        raise ValueError(f"Invalid project name '{project_name}': {e}")
+
+    # Fetch runs based on provided tags
+    try:
+        runs_table_df = project.fetch_runs_table(
+            state="inactive", columns=["sys/id"], tag=tags, sort_by="sys/id"
+        ).to_pandas()
+    except Exception as e:
+        raise ValueError(f"Invalid tags {tags}: {e}")
+
+    run_ids = runs_table_df["sys/id"].values.tolist()
+
+    # Download logs and process JSON files concurrently
+    with ThreadPoolExecutor() as executor:
+        futures = [
+            executor.submit(
+                _download_and_process_data,
+                project_name,
+                run_id,
+                store_directory,
+                neptune_data_key,
+            )
+            for run_id in run_ids
+        ]
+        for future in tqdm(
+            as_completed(futures),
+            total=len(futures),
+            desc="Downloading and processing data",
+            disable=disable_progress_bar,
+        ):
+            future.result()
+
+    # Restore neptune logger level
+    neptune_logger.setLevel(logging.INFO)
+    print(f"{Fore.CYAN}{Style.BRIGHT}Data downloaded and processed successfully!{Style.RESET_ALL}")
+
+def _download_and_process_data(
+    project_name: str, run_id: str, store_directory: str, neptune_data_key: str
+) -> None:
+    """Downloads and processes data for a single Neptune run."""
+    try:
+        with neptune.init_run(
+            project=project_name, with_id=run_id, mode="read-only"
+        ) as run:
+            # Fetch Neptune tags
+            neptune_tag_string = run["config/logger/kwargs/neptune_tag"].fetch()
+            neptune_tags = eval(neptune_tag_string)
+            
+            # Extract relevant tags
+            try:
+                start_index = neptune_tags.index('retmat-cont-systems-debug')
+                additional_tags = neptune_tags[start_index + 1:]  # Exclude 'retmat-cont-systems-debug'
+            except ValueError:
+                print(f"'retmat-cont-systems-debug' not found in Neptune tags for run {run_id}")
+                additional_tags = []
+
+            for j, data_key in enumerate(
+                run.get_structure()[neptune_data_key].keys(), start=1
+            ):
+                file_path = f"{store_directory}/{run_id}"
+                if j > 1:
+                    file_path += f"_{j}"
+                run[f"{neptune_data_key}/{data_key}"].download(destination=file_path)
+                _extract_and_process_zip_file(file_path, additional_tags)
+    except Exception as e:
+        print(f"Error processing data for run {run_id}: {e}")
+
+def _extract_and_process_zip_file(file_path: str, additional_tags: List[str]) -> None:
+    """Extracts a zip file and processes any JSON files within it."""
+    try:
+        with zipfile.ZipFile(file_path, "r") as zip_ref:
+            for member in zip_ref.infolist():
+                if not member.is_dir():
+                    target_path = Path(f"{file_path}{Path(member.filename).suffix}")
+                    target_path.parent.mkdir(parents=True, exist_ok=True)
+                    with zip_ref.open(member) as src, target_path.open("wb") as dest:
+                        dest.write(src.read())
+                    
+                    # Process the JSON file after extraction
+                    if target_path.suffix == '.json':
+                        process_json_file(str(target_path), additional_tags)
+            
+            # Remove the zip file
+            os.remove(file_path)
+    except zipfile.BadZipFile:
+        # If the file is not zipped, no action is required
+        pass
+    except Exception as e:
+        print(f"Error while unzipping or processing data at path {file_path}: {e}")
+
+def process_json_file(file_path: str, additional_tags: List[str]) -> None:
+    """Processes a single JSON file, updating algo_names with additional tags."""
+    with open(file_path, 'r') as f:
+        data = json.load(f)
+    
+    # Update algo_name in the JSON structure
+    for env_name, tasks in data.items():
+        if isinstance(tasks, dict):
+            for task_name, algos in tasks.items():
+                if isinstance(algos, dict):
+                    updated_algos = {}
+                    for algo_name, algo_data in algos.items():
+                        # Only add additional tags, excluding 'retmat-cont-systems-debug'
+                        new_algo_name = f"{algo_name}_{'_'.join(additional_tags)}" if additional_tags else algo_name
+                        updated_algos[new_algo_name] = algo_data
+                    tasks[task_name] = updated_algos
+    
+    # Write the updated data back to the file
+    with open(file_path, 'w') as f:
+        json.dump(data, f, indent=2)
+
+# Main execution
+if __name__ == "__main__":
+    for tag in neptune_tags:
+        pull_neptune_data(
+            project_name="InstaDeep/Mava",
+            tags=[tag],
+            store_directory="./data/cont-update-benchmark",
+        )
+
+    concatenate_json_files(
+        input_directory="./data/cont-update-benchmark",
+        output_json_path="./data/cont-update-benchmark/merged_data",
+    )
\ No newline at end of file
diff --git a/remove_algo_data.py b/data_processing_scripts/remove_algo_data.py
similarity index 100%
rename from remove_algo_data.py
rename to data_processing_scripts/remove_algo_data.py
diff --git a/remove_certain_tasks.py b/data_processing_scripts/remove_certain_tasks.py
similarity index 60%
rename from remove_certain_tasks.py
rename to data_processing_scripts/remove_certain_tasks.py
index 5d5c66ee..fb80ec5c 100644
--- a/remove_certain_tasks.py
+++ b/data_processing_scripts/remove_certain_tasks.py
@@ -12,22 +12,28 @@ def filter_json(data, tasks_to_remove):
             filtered_data[env_name] = filtered_env_tasks
     return filtered_data
 
+
+base_folder_name = "biggest-benchmark"
+
 # Example usage:
-input_file = 'data/full-benchmark-update/merged_data/master_norm_episode_return.json'
-output_file = 'data/full-benchmark-update/merged_data/master_norm_episode_return.json'
+input_file = f"./data/{base_folder_name}/merged_data/metrics_winrate_processed.json"
+output_file = f"./data/{base_folder_name}/merged_data/metrics_winrate_processed.json"
 tasks_to_remove = [
-    'clean-20x20x15a',
-    'clean-15x15x6a',
-    'clean-10x10x3a'
+    "clean-20x20x15a",
+    "clean-15x15x6a",
+    "clean-10x10x3a",
+    "con-15x15x23a",
+    "medium-4ag-hard",
+    "xlarge-4ag",
 ]  # Replace with your list of tasks to remove
 
 # Read the input JSON file
-with open(input_file, 'r') as f:
+with open(input_file, "r") as f:
     data = json.load(f)
 
 # Filter the data
 filtered_data = filter_json(data, tasks_to_remove)
 
 # Write the filtered data to the output JSON file
-with open(output_file, 'w') as f:
-    json.dump(filtered_data, f, indent=2)
\ No newline at end of file
+with open(output_file, "w") as f:
+    json.dump(filtered_data, f, indent=2)
diff --git a/data_processing_scripts/remove_win_rate.py b/data_processing_scripts/remove_win_rate.py
new file mode 100644
index 00000000..17d0cf9d
--- /dev/null
+++ b/data_processing_scripts/remove_win_rate.py
@@ -0,0 +1,44 @@
+import json
+
+
+def remove_win_rate_from_maconnector(data):
+    if "MaConnector" in data:
+        for task_name, task_data in data["MaConnector"].items():
+            for algo_name, algo_data in task_data.items():
+                for seed, seed_data in algo_data.items():
+                    # Remove from absolute_metrics
+                    if (
+                        "absolute_metrics" in seed_data
+                        and "win_rate" in seed_data["absolute_metrics"]
+                    ):
+                        del seed_data["absolute_metrics"]["win_rate"]
+
+                    # Remove from step_<j> data
+                    for step_key in list(seed_data.keys()):
+                        if step_key.startswith("step_"):
+                            if "win_rate" in seed_data[step_key]:
+                                del seed_data[step_key]["win_rate"]
+
+    return data
+
+
+# File paths
+base_folder_name = "biggest-benchmark"
+input_filename = f"./data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+output_filename = f"./data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+
+# Load the JSON file
+print("Loading data...")
+with open(input_filename, "r") as f:
+    data = json.load(f)
+
+# Process the data
+print("Removing 'win_rate' metric from MaConnector environment...")
+processed_data = remove_win_rate_from_maconnector(data)
+
+# Save the result to a new JSON file
+print("Saving processed data...")
+with open(output_filename, "w") as f:
+    json.dump(processed_data, f, indent=2)
+
+print(f"Processing complete. Check '{output_filename}' for the result.")
diff --git a/rename_algos.py b/data_processing_scripts/rename_algos.py
similarity index 57%
rename from rename_algos.py
rename to data_processing_scripts/rename_algos.py
index f8a8bca8..a8c0adab 100644
--- a/rename_algos.py
+++ b/data_processing_scripts/rename_algos.py
@@ -5,27 +5,34 @@ def rename_algorithms(data):
     rename_map = {
         "retmat_cont": "retmat",
         "retmat_cont_memory": "retmat_memory",
-        "mat_cont": "mat"
+        "mat_cont": "mat",
     }
 
     for env_name in data:
         for task_name in data[env_name]:
-            algos_to_rename = list(set(data[env_name][task_name].keys()) & set(rename_map.keys()))
+            algos_to_rename = list(
+                set(data[env_name][task_name].keys()) & set(rename_map.keys())
+            )
             for old_name in algos_to_rename:
                 new_name = rename_map[old_name]
-                data[env_name][task_name][new_name] = data[env_name][task_name].pop(old_name)
+                data[env_name][task_name][new_name] = data[env_name][task_name].pop(
+                    old_name
+                )
                 print(f"Renamed {old_name} to {new_name} in {env_name}/{task_name}")
 
+
+base_folder_name = "biggest-benchmark-sac"
+
 # Load the JSON file
-file_path = 'data/full-benchmark-update/merged_data/interim_seed_duplicated.json'  # Replace with your actual file path
-with open(file_path, 'r') as file:
+file_path = f"./data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+with open(file_path, "r") as file:
     data = json.load(file)
 
 # Rename the algorithms
 rename_algorithms(data)
 
 # Save the modified data back to the JSON file
-with open(file_path, 'w') as file:
+with open(file_path, "w") as file:
     json.dump(data, file, indent=2)
 
-print("JSON file has been updated with renamed algorithms.")
\ No newline at end of file
+print("JSON file has been updated with renamed algorithms.")
diff --git a/data_processing_scripts/set_same_seed_number.py b/data_processing_scripts/set_same_seed_number.py
new file mode 100644
index 00000000..e88023c1
--- /dev/null
+++ b/data_processing_scripts/set_same_seed_number.py
@@ -0,0 +1,57 @@
+import json
+import random
+
+
+def standardize_seeds(data, num_seeds=5):
+    for env_name, env_data in data.items():
+        for task_name, task_data in env_data.items():
+            for algo_name, algo_data in task_data.items():
+                # Get all existing seeds
+                existing_seeds = list(algo_data.keys())
+
+                # Print the original count
+                print(
+                    f"{env_name}/{task_name}/{algo_name}: Before: {len(existing_seeds)}",
+                    end="",
+                )
+
+                # If we have more than num_seeds, randomly select num_seeds
+                if len(existing_seeds) > num_seeds:
+                    selected_seeds = random.sample(existing_seeds, num_seeds)
+                else:
+                    # If we have less than or equal to num_seeds, use all existing seeds
+                    selected_seeds = existing_seeds
+
+                # Create a new dictionary with standardized seed names
+                new_algo_data = {}
+                for i, seed in enumerate(selected_seeds):
+                    new_algo_data[f"seed_{i}"] = algo_data[seed]
+
+                # Replace the old algo_data with the new one
+                task_data[algo_name] = new_algo_data
+
+                # Print the new count
+                print(f", After: {len(new_algo_data)}")
+
+    return data
+
+
+base_folder_name = "biggest-benchmark-sac"
+
+# Replace 'your_file.json' with your actual JSON file name
+json_filename = f"./data/{base_folder_name}/merged_data/metrics_winrate_processed.json"
+new_json_filename = f"./data/{base_folder_name}/merged_data/metrics_seed_processed.json"
+
+# Load the JSON file
+with open(json_filename, "r") as f:
+    data = json.load(f)
+
+print("Processing and standardizing seeds:")
+# Standardize the seeds
+standardized_data = standardize_seeds(data)
+
+# Save the result to a new JSON file
+with open(new_json_filename, "w") as f:
+    json.dump(standardized_data, f, indent=2)
+
+print("\nProcessing complete. Check 'metrics_seed_processed.json' for the result.")
diff --git a/keep_certain_tasks.py b/keep_certain_tasks.py
deleted file mode 100644
index a80a59e9..00000000
--- a/keep_certain_tasks.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import json
-
-
-def filter_json(data, tasks_to_keep):
-    filtered_data = {}
-    for env_name, env_tasks in data.items():
-        kept_tasks = {task: info for task, info in env_tasks.items() if task in tasks_to_keep}
-        if kept_tasks:
-            filtered_data[env_name] = kept_tasks
-    return filtered_data
-
-# Example usage:
-input_file = 'data/limited_benchmark/retmat-mat-ppo/merged_data/metrics_winrate_processed.json'
-output_file = 'data/limited_benchmark/retmat-mat-ppo/merged_data/task_name_processed.json'
-tasks_to_keep = [
-    'tiny-4ag',
-    'small-4ag',
-    '5m_vs_6m',
-    '27m_vs_30m',
-    'smacv2_10_units',
-    '15x15-3p-5f',
-    '15x15-4p-5f',
-    '6h_vs_8z',
-]  # Replace with your list of tasks to keep
-
-# Read the input JSON file
-with open(input_file, 'r') as f:
-    data = json.load(f)
-
-# Filter the data
-filtered_data = filter_json(data, tasks_to_keep)
-
-# Write the filtered data to the output JSON file
-with open(output_file, 'w') as f:
-    json.dump(filtered_data, f, indent=2)
-
-print(f"Filtered data has been written to {output_file}")
\ No newline at end of file
diff --git a/marl_eval/utils/data_processing_utils.py b/marl_eval/utils/data_processing_utils.py
index a87f472d..32aae285 100644
--- a/marl_eval/utils/data_processing_utils.py
+++ b/marl_eval/utils/data_processing_utils.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 import copy
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 from colorama import Fore, Style
@@ -72,7 +72,7 @@ def check_comma_in_algo_names(
 
 
 def lower_case_dictionary_keys(
-    dictionary: Dict[str, Dict[str, Any]]
+    dictionary: Dict[str, Dict[str, Any]],
 ) -> Dict[str, Dict[str, Any]]:
     """Recursively make all keys in a nested dictionary lower case."""
 
@@ -314,6 +314,7 @@ def create_matrices_for_rliable(  # noqa: C901
     data_dictionary: Dict[str, Dict[str, Any]],
     environment_name: str,
     metrics_to_normalize: List[str],
+    task_name: Optional[str] = None,
 ) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
     """Creates two dictionaries containing arrays required for using the rliable tools.
 
@@ -365,6 +366,10 @@ def create_matrices_for_rliable(  # noqa: C901
         # environment will have the same number of steps, same number of tasks
         # and same number of.
         tasks = list(data_env.keys())
+
+        if task_name is not None:
+            tasks = [task_name]
+
         algorithms = list(data_env[tasks[0]].keys())
         runs = list(data_env[tasks[0]][algorithms[0]].keys())
         steps = list(data_env[tasks[0]][algorithms[0]][runs[0]].keys())
diff --git a/process_step_counts_mabrax.py b/process_step_counts_mabrax.py
deleted file mode 100644
index 2b4e3a4e..00000000
--- a/process_step_counts_mabrax.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import json
-
-import numpy as np
-
-
-def load_json(filename):
-    with open(filename) as file:
-        data = json.load(file)
-    return data
-
-
-# Save the modified data back into JSON
-def save_json(filename, data):
-    with open(filename, "w") as file:
-        json.dump(data, file, indent=4)
-
-
-def interpolate_steps(data):
-    for env_name, task_data in data.items():
-        for task_name, alg_data in task_data.items():
-            for algorithm_name, seed_data in alg_data.items():
-                for seed_key, metrics in seed_data.items():
-                    if seed_key == "absolute_metrics":
-                        continue  # Skip absolute metrics
-
-                    step_keys = sorted(
-                        [key for key in metrics.keys() if key.startswith("step_")],
-                        key=lambda x: int(x.split("_")[1]),
-                    )
-                    max_step_index = max(int(key.split("_")[1]) for key in step_keys)
-
-                    # if max_step_index < 121:
-                    if max_step_index < 1830:
-                        # Interpolation
-                        x = np.array([int(k.split("_")[1]) for k in step_keys])
-                        y_step_count = np.array(
-                            [metrics[k]["step_count"] for k in step_keys]
-                        )
-                        y_elapsed_time = np.array(
-                            [metrics[k]["elapsed_time"] for k in step_keys]
-                        )
-
-                        metric_keys = [
-                            k
-                            for k in metrics[step_keys[0]].keys()
-                            if k
-                            not in ["step_count", "elapsed_time", "steps_per_second"]
-                        ]
-                        y_metrics = {
-                            metric: np.array([metrics[k][metric][0] for k in step_keys])
-                            for metric in metric_keys
-                        }
-
-                        # x_new = np.linspace(
-                        #     0, max_step_index, 122
-                        # )  # Ensure covering up to step_121
-                        x_new = np.linspace(
-                            0, max_step_index, 1831
-                        )  # Ensure covering up to step_1830
-                        step_count_interp = np.interp(x_new, x, y_step_count)
-                        elapsed_time_interp = np.interp(x_new, x, y_elapsed_time)
-                        metrics_interp = {
-                            metric: np.interp(x_new, x, y)
-                            for metric, y in y_metrics.items()
-                        }
-
-                        # Update the data with interpolated values
-                        for i in range(
-                            len(x_new)
-                        ):  # Adjusted to iterate over the new range
-                            # Now directly using i to ensure step_121 is included
-                            step_key = f"step_{i}"
-                            metrics[step_key] = {
-                                "step_count": int(step_count_interp[i]),
-                                "elapsed_time": elapsed_time_interp[i],
-                                "steps_per_second": metrics[step_keys[-1]][
-                                    "steps_per_second"
-                                ],  # Duplicating the last value
-                            }
-                            for metric, y in metrics_interp.items():
-                                metrics[step_key][metric] = [y[i]]
-
-    return data
-
-
-# Replace 'your_file.json' with your actual JSON file name
-json_filename = "data/full_benchmark/mabrax/merged_data/metrics.json"
-new_json_filename = (
-    "data/full_benchmark/mabrax/merged_data/metrics_stepcount_processed.json"
-)
-
-data = load_json(json_filename)
-processed_data = interpolate_steps(data)
-save_json(new_json_filename, processed_data)
diff --git a/pull_data.py b/pull_data.py
deleted file mode 100644
index 54d05e88..00000000
--- a/pull_data.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from marl_eval.json_tools import concatenate_json_files, pull_neptune_data
-
-neptune_tags = [
-        "mat-measure-set-benchmark-lbf",
-        "mat-measure-set-benchmark-rware",
-        "mat-measure-set-benchmark-smax",
-        # "retmat-h2-benchmark-no-brax",
-        # "retmat-simple-rewrite-h1-benchmark-no-xpos-no-brax",
-        "ruan-measure-set-smax-benchmark",
-        "rerun-mava-rec-systems-smax",
-        "rware-measure-set-benchmark-small-lr",
-        "rerun-mava-rec-systems-rware",
-        # "lbf_best_hyperparams",
-        # "retmat-h2-first-benchmark",
-        # "mat-measure-set-benchmark-mabrax",
-        # "vector-cleaner-measure-set-benchmark",
-        "vector-connector-measure-set-benchmark",
-        # "mat-measure-set-benchmark-vector-cleaner",
-        "mat-measure-set-benchmark-vector-connector",
-        # "retmat-simple-rewrite-hypothesis-1-benchmark-no-xpos",
-        # "retmat-h2-first-benchmark",
-        # "mat-measure-set-benchmark-mabrax",
-        # "liam-mabrax-benchmark-ppo-2",
-        "retmat-new-20M-sweep-benchmark",
-    ]
-
-for tag in neptune_tags:
-    pull_neptune_data(
-        project_name="InstaDeep/Mava",
-        tags=[tag],
-        store_directory="./data/full-benchmark-update",
-    )
-
-concatenate_json_files(
-    input_directory="./data/full-benchmark-update",
-    output_json_path="./data/full-benchmark-update/merged_data",
-)