From f51c396435897cd56777d86c83d365c47a54808e Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Mon, 14 Oct 2024 16:29:53 -0700
Subject: [PATCH 1/2] Fix spoiler model uploads.

Refactor to use a single average loss per uid explicitly.

Log using the competitive win rates and note if not competitive.

Correctly remove models that never win beyond just comparing to 'top model'.

Improve logging when no models are dropped.

Add unit tests

Fix case of duplicated model blocks

Maintain behavior for how many models are kept in the eval set

Amend logs

Enumerate competitive_uids instead of uids

Use uids instead of comp_weights to keep all arrays of the expected length
---
 neurons/validator.py              |  43 ++++++--
 pretrain/validation.py            |  76 +++++++++++---
 tests/pretrain/test_validation.py | 167 ++++++++++++++++++++++++++++++
 3 files changed, 258 insertions(+), 28 deletions(-)
 create mode 100644 tests/pretrain/test_validation.py

diff --git a/neurons/validator.py b/neurons/validator.py
index 936fbeb6..4681e477 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -799,14 +799,14 @@ async def run_step(self):
         tokenizer = pt.model.load_tokenizer(
             competition.constraints, cache_dir=self.config.model_dir
         )
-        
+
         if cur_block >= constants.sample_pack_block:
             pack_samples = True
             pages_per_eval = constants.pages_per_eval_pack
         else:
             pack_samples = False
             pages_per_eval = constants.pages_per_eval_unpack
-        
+
         # If the option is set in the config, override
         pages_per_eval = (
             self.config.pages_per_eval
@@ -896,7 +896,7 @@ async def run_step(self):
                         )
 
                     del model_i
-                    
+
                 except Exception as e:
                     bt.logging.error(
                         f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity."
@@ -914,14 +914,27 @@ async def run_step(self):
 
         # Compute wins and win rates per uid.
         # Take the average loss across all batches for comparison of best model.
-        # Keep it as a list of 1 for later calculations.
-        losses_per_uid = {
-            uid: [state.avg_loss()] for uid, state in uid_to_state.items()
+        uid_to_average_loss = {
+            uid: state.avg_loss() for uid, state in uid_to_state.items()
         }
         uid_to_block = {uid: state.block for uid, state in uid_to_state.items()}
+
+        # Filter to the list of uids that may at one point be a top model.
+        competitive_uids = pt.validation.compute_competitive_uids(
+            uid_to_average_loss, uid_to_block, competition.constraints.epsilon_func
+        )
+
+        # Log which models got dropped for the second pass.
+        dropped_uids = [uid for uid in uids if uid not in competitive_uids]
+        if dropped_uids:
+            bt.logging.info(
+                f"The following uids were not included in the win rate calculation because they did not beat the fully decayed loss of any previously submitted model in this eval batch: {dropped_uids}."
+            )
+
+        # Calculate new wins and win_rate with only the competitive uids considered.
         wins, win_rate = pt.validation.compute_wins(
-            uids,
-            losses_per_uid,
+            competitive_uids,
+            uid_to_average_loss,
             uid_to_block,
             competition.constraints.epsilon_func,
             cur_block,
@@ -932,7 +945,7 @@ async def run_step(self):
 
         # Compute softmaxed weights based on win rate.
         model_weights = torch.tensor(
-            [win_rate[uid] for uid in uids], dtype=torch.float32
+            [win_rate.get(uid, 0) for uid in uids], dtype=torch.float32
         )
         step_weights = torch.softmax(model_weights / constants.temperature, dim=0)
 
@@ -977,6 +990,13 @@ async def run_step(self):
                 : self.config.sample_min
             ]
         )
+        # Make sure we always keep around sample_min number of models to maintain previous behavior.
+        if len(models_to_keep) < self.config.sample_min:
+            for uid in sorted(uid_to_average_loss, key=uid_to_average_loss.get):
+                if len(models_to_keep) >= self.config.sample_min:
+                    break
+                models_to_keep.add(uid)
+
         self._update_uids_to_eval(
             competition.id, models_to_keep, active_competition_ids
         )
@@ -1102,8 +1122,9 @@ def log_step(
                 "epsilon_adv": competition_epsilon_func.compute_epsilon(
                     current_block, uid_to_state[uid].block
                 ),
-                "win_rate": win_rate[uid],
-                "win_total": wins[uid],
+                # We use 0 in the case where a uid was not competitive and therefore not used in win rate calcs.
+                "win_rate": win_rate[uid] if uid in win_rate else 0,
+                "win_total": wins[uid] if uid in wins else 0,
                 "weight": self.weights[uid].item(),
                 "norm_weight": sub_competition_weights[idx].item(),
             }
diff --git a/pretrain/validation.py b/pretrain/validation.py
index b0aeaa76..a4b21868 100644
--- a/pretrain/validation.py
+++ b/pretrain/validation.py
@@ -66,7 +66,7 @@ def iswin(
 
 def compute_wins(
     uids: typing.List[int],
-    losses_per_uid: typing.Dict[int, typing.List[float]],
+    uid_to_average_loss: typing.Dict[int, float],
     uid_to_block: typing.Dict[int, int],
     epsilon_func: EpsilonFunc,
     current_block: int,
@@ -76,7 +76,7 @@ def compute_wins(
 
     Parameters:
         uids (list): A list of uids to compare.
-        losses_per_uid (dict): A dictionary of losses for each uid by batch.
+        uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches.
         uid_to_block (dict): A dictionary of blocks for each uid.
         epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block.
         current_block: The current block.
@@ -92,26 +92,68 @@ def compute_wins(
             if uid_i == uid_j:
                 continue
 
-            for loss_i, loss_j in zip(losses_per_uid[uid_i], losses_per_uid[uid_j]):
-                wins[uid_i] += (
-                    1
-                    if iswin(
-                        loss_i,
-                        loss_j,
-                        uid_to_block[uid_i],
-                        uid_to_block[uid_j],
-                        epsilon_func,
-                        current_block,
-                    )
-                    else 0
+            wins[uid_i] += (
+                1
+                if iswin(
+                    uid_to_average_loss[uid_i],
+                    uid_to_average_loss[uid_j],
+                    uid_to_block[uid_i],
+                    uid_to_block[uid_j],
+                    epsilon_func,
+                    current_block,
                 )
-                total_matches += 1
-        # Calculate win rate for uid i
-        win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 0
+                else 0
+            )
+            total_matches += 1
+        # Calculate win rate for uid i. Default win_rate to 1 for the case of no matches.
+        win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 1
 
     return wins, win_rate
 
 
+def compute_competitive_uids(
+    uid_to_average_loss: typing.Dict[int, float],
+    uid_to_block: typing.Dict[int, int],
+    epsilon_func: EpsilonFunc,
+) -> typing.List[int]:
+    """
+    Computes the list of any uids that may at one point be the top model.
+
+    Parameters:
+        uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches.
+        uid_to_block (dict): A dictionary of blocks for each uid.
+        epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block.
+
+    Returns:
+        list: A list of uids that may at one point be the top model.
+    """
+    # Get fully decayed loss for every model.
+    fully_decayed_epsilon = 1 - epsilon_func.compute_epsilon(
+        current_block=math.inf, model_block=0
+    )
+    fully_decayed_losses = {
+        uid: uid_to_average_loss[uid] * fully_decayed_epsilon for uid in uid_to_block
+    }
+
+    # Iterate through the models and only keep models who's loss is better than
+    # all models uploaded at an earlier block, after they've fully decayed.
+    # If the model cannot, then there exists at least one model at an earlier block which
+    # will always have a better epislon adjusted loss, thus it will never be the top model.
+    competitive_uids = []
+    for uid, loss in uid_to_average_loss.items():
+        # Check if the current UID beats all earlier (or same block) models at full decay.
+        # all([]) is true so we always keep the earliest model.
+        earlier_uids = [
+            i
+            for i, block in uid_to_block.items()
+            if i != uid and block <= uid_to_block[uid]
+        ]
+        if all(loss < fully_decayed_losses[uid_other] for uid_other in earlier_uids):
+            competitive_uids.append(uid)
+
+    return competitive_uids
+
+
 def check_for_reasonable_output(
     model, input1: torch.Tensor, input2: torch.Tensor, pad_token_id: int
 ) -> bool:
diff --git a/tests/pretrain/test_validation.py b/tests/pretrain/test_validation.py
new file mode 100644
index 00000000..3393bb7b
--- /dev/null
+++ b/tests/pretrain/test_validation.py
@@ -0,0 +1,167 @@
+import unittest
+import pretrain as pt
+from taoverse.model.competition.epsilon import LinearDecay
+
+
+class TestValidation(unittest.TestCase):
+    def test_compute_competitive_uids_filters_clones(self):
+        # Check that if the current top model submits a few clones, they are filtered out.
+        uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [75, 100],
+        )
+
+        # Check that if a new top model submits clones, they are filtered out.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 100],
+        )
+
+    def test_compute_competitive_uids_better_models_sequentially_better(self):
+        # Each uploaded model is better than the previous. Expect to keep all of them.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.498}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2500}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 75, 100],
+        )
+
+    def test_compute_competitive_uids_less_than_epsilon_better(self):
+        # Models are sequentially better, but less than epislon at full decay. Expect to only keep the first.
+        uid_to_average_loss = {100: 0.5, 50: 0.4999, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2500}
+        epsilon_func = LinearDecay(0.005, 0.01, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [100],
+        )
+
+    def test_compute_competitive_uids_later_worse_model_filtered(self):
+        # Models are sequentially better, but the last one is worse than a previous model.
+        uid_to_average_loss = {100: 0.5, 50: 0.498, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2500}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 100],
+        )
+
+    def test_compute_competitive_uids_few_models(self):
+        # Make sure the function works with none or only a few models.
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            pt.validation.compute_competitive_uids({}, {}, epsilon_func), []
+        )
+        self.assertEqual(
+            pt.validation.compute_competitive_uids(
+                {100: 0.5}, {100: 1000}, epsilon_func
+            ),
+            [100],
+        )
+        self.assertEqual(
+            pt.validation.compute_competitive_uids(
+                {100: 0.5, 50: 0.7}, {100: 1000, 50: 2000}, epsilon_func
+            ),
+            [100],
+        )
+
+    def test_compute_competitive_uids_same_block(self):
+        # Make sure that if two models are uploaded at the same block, the one with the lower average loss is kept.
+        uid_to_average_loss = {100: 0.5, 50: 0.4}
+        uid_to_block = {100: 1000, 50: 1000}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50],
+        )
+
+        # Check that if a new top model submits clones, they are filtered out.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 100],
+        )
+
+    def test_compute_wins_one_uid(self):
+        # Verifies compute_wins works with a single uid.
+        wins, win_rates = pt.validation.compute_wins(
+            [100], {100: 0.5}, {100: 1000}, LinearDecay(0.005, 0.0001, 1000), 1000
+        )
+        self.assertEqual(wins, {100: 0})
+        self.assertEqual(win_rates, {100: 1.0})
+
+    def test_compute_filtered_win_rates(self):
+        # Mimic what the validator does by first filtering the models and then computing win rates.
+
+        # The current top model with submitted clones.
+        uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        curr_block = 2001
+        epsilon_func = LinearDecay(0.005, 0.0001, 2000)
+        competitive_uids = pt.validation.compute_competitive_uids(
+            uid_to_average_loss, uid_to_block, epsilon_func
+        )
+        wins, win_rates = pt.validation.compute_wins(
+            competitive_uids,
+            uid_to_average_loss,
+            uid_to_block,
+            epsilon_func,
+            curr_block,
+        )
+        self.assertEqual(wins, {100: 1, 75: 0})
+        self.assertEqual(win_rates, {100: 1.0, 75: 0.0})
+
+        # Verify the case where a new top model submits a few clones.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499, 80: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001, 80: 2002}
+        curr_block = 2002
+        epsilon_func = LinearDecay(0.005, 0.0001, 2000)
+        competitive_uids = pt.validation.compute_competitive_uids(
+            uid_to_average_loss, uid_to_block, epsilon_func
+        )
+        wins, win_rates = pt.validation.compute_wins(
+            competitive_uids,
+            uid_to_average_loss,
+            uid_to_block,
+            epsilon_func,
+            curr_block,
+        )
+        self.assertEqual(wins, {100: 1, 50: 0})
+        self.assertEqual(win_rates, {100: 1.0, 50: 0.0})

From e5c674f00716fec57164c4beecb25700117a3121 Mon Sep 17 00:00:00 2001
From: cryptal-mc <alan.aboudib@macrocosmos.ai>
Date: Tue, 15 Oct 2024 10:35:55 +0000
Subject: [PATCH 2/2] Bumped version to 4.5.2 and validator to 3.4.0

---
 constants/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index 61c590d1..052f3c26 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -34,10 +34,10 @@
 # ---------------------------------
 
 # Release
-__version__ = "4.5.2"
+__version__ = "4.5.3"
 
 # Validator schema version
-__validator_version__ = "3.3.0"
+__validator_version__ = "3.4.0"
 version_split = __validator_version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))