From bdfb86849a25c629b96625f5f200bf99a4293324 Mon Sep 17 00:00:00 2001
From: ashuaibi7 <ashuaibi@princeton.edu>
Date: Wed, 15 Jan 2025 15:55:40 -0500
Subject: [PATCH] modified epsilon value adaptively based on number of samples
 in subtype

---
 src/dialect/utils/plotting.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/dialect/utils/plotting.py b/src/dialect/utils/plotting.py
index fbd33d1..6352f7a 100644
--- a/src/dialect/utils/plotting.py
+++ b/src/dialect/utils/plotting.py
@@ -34,7 +34,7 @@
 DECOY_GENE_COLOR = "#FFB3B3"  # Pastel red for decoy genes
 DRIVER_GENE_COLOR = "#A3C1DA"  # Blue-gray for driver genes
 EDGE_COLOR = "black"
-EPSILON = 0.05  # DIALECT Threshold for Tau_1X and Tau_X1
+EPSILON_MUTATION_COUNT = 20  # minimum count of mutations
 PVALUE_THRESHOLD = 1
 
 
@@ -154,6 +154,8 @@ def draw_network_gridplot_across_methods(
         "WeSME": "WeSME P-Val",
     }
 
+    num_samples = results_df.shape[0]
+
     fig, axes = plt.subplots(2, 3, figsize=(24, 16))
     fig.suptitle(f"Top 10 Ranked ME Pairs in {subtype}", fontsize=42, y=0.999)
     for idx, (method, col) in enumerate(methods.items()):
@@ -164,10 +166,12 @@ def draw_network_gridplot_across_methods(
         ).head(top_k)
 
         if method == "DIALECT":
+            # i want to set the epsilon proportionally to the number of samples
+            epsilon = EPSILON_MUTATION_COUNT / num_samples
             top_ranking_pairs = top_ranking_pairs[
                 (top_ranking_pairs["Rho"] < 0)
-                & (top_ranking_pairs["Tau_1X"] > EPSILON)
-                & (top_ranking_pairs["Tau_X1"] > EPSILON)
+                & (top_ranking_pairs["Tau_1X"] > epsilon)
+                & (top_ranking_pairs["Tau_X1"] > epsilon)
             ]
         elif method == "MEGSA":
             top_ranking_pairs = top_ranking_pairs[top_ranking_pairs["MEGSA S-Score (LRT)"] > 0]