Merge pull request #186 from macrocosm-os/dev

Release 4.5.3
macrocosm-os · Oct 15, 2024 · dd068a2 · dd068a2
2 parents 0637ee3 + b4e2fcc
commit dd068a2
Show file tree

Hide file tree

Showing 4 changed files with 260 additions and 30 deletions.
diff --git a/constants/__init__.py b/constants/__init__.py
@@ -34,10 +34,10 @@
 # ---------------------------------
 
 # Release
-__version__ = "4.5.2"
+__version__ = "4.5.3"
 
 # Validator schema version
-__validator_version__ = "3.3.0"
+__validator_version__ = "3.4.0"
 version_split = __validator_version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))

diff --git a/neurons/validator.py b/neurons/validator.py
@@ -799,14 +799,14 @@ async def run_step(self):
         tokenizer = pt.model.load_tokenizer(
             competition.constraints, cache_dir=self.config.model_dir
         )
-        
+
         if cur_block >= constants.sample_pack_block:
             pack_samples = True
             pages_per_eval = constants.pages_per_eval_pack
         else:
             pack_samples = False
             pages_per_eval = constants.pages_per_eval_unpack
-        
+
         # If the option is set in the config, override
         pages_per_eval = (
             self.config.pages_per_eval
@@ -896,7 +896,7 @@ async def run_step(self):
                         )
 
                     del model_i
-                    
+
                 except Exception as e:
                     bt.logging.error(
                         f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity."
@@ -914,14 +914,27 @@ async def run_step(self):
 
         # Compute wins and win rates per uid.
         # Take the average loss across all batches for comparison of best model.
-        # Keep it as a list of 1 for later calculations.
-        losses_per_uid = {
-            uid: [state.avg_loss()] for uid, state in uid_to_state.items()
+        uid_to_average_loss = {
+            uid: state.avg_loss() for uid, state in uid_to_state.items()
         }
         uid_to_block = {uid: state.block for uid, state in uid_to_state.items()}
+
+        # Filter to the list of uids that may at one point be a top model.
+        competitive_uids = pt.validation.compute_competitive_uids(
+            uid_to_average_loss, uid_to_block, competition.constraints.epsilon_func
+        )
+
+        # Log which models got dropped for the second pass.
+        dropped_uids = [uid for uid in uids if uid not in competitive_uids]
+        if dropped_uids:
+            bt.logging.info(
+                f"The following uids were not included in the win rate calculation because they did not beat the fully decayed loss of any previously submitted model in this eval batch: {dropped_uids}."
+            )
+
+        # Calculate new wins and win_rate with only the competitive uids considered.
         wins, win_rate = pt.validation.compute_wins(
-            uids,
-            losses_per_uid,
+            competitive_uids,
+            uid_to_average_loss,
             uid_to_block,
             competition.constraints.epsilon_func,
             cur_block,
@@ -932,7 +945,7 @@ async def run_step(self):
 
         # Compute softmaxed weights based on win rate.
         model_weights = torch.tensor(
-            [win_rate[uid] for uid in uids], dtype=torch.float32
+            [win_rate.get(uid, 0) for uid in uids], dtype=torch.float32
         )
         step_weights = torch.softmax(model_weights / constants.temperature, dim=0)
 
@@ -977,6 +990,13 @@ async def run_step(self):
                 : self.config.sample_min
             ]
         )
+        # Make sure we always keep around sample_min number of models to maintain previous behavior.
+        if len(models_to_keep) < self.config.sample_min:
+            for uid in sorted(uid_to_average_loss, key=uid_to_average_loss.get):
+                if len(models_to_keep) >= self.config.sample_min:
+                    break
+                models_to_keep.add(uid)
+
         self._update_uids_to_eval(
             competition.id, models_to_keep, active_competition_ids
         )
@@ -1102,8 +1122,9 @@ def log_step(
                 "epsilon_adv": competition_epsilon_func.compute_epsilon(
                     current_block, uid_to_state[uid].block
                 ),
-                "win_rate": win_rate[uid],
-                "win_total": wins[uid],
+                # We use 0 in the case where a uid was not competitive and therefore not used in win rate calcs.
+                "win_rate": win_rate[uid] if uid in win_rate else 0,
+                "win_total": wins[uid] if uid in wins else 0,
                 "weight": self.weights[uid].item(),
                 "norm_weight": sub_competition_weights[idx].item(),
             }

diff --git a/pretrain/validation.py b/pretrain/validation.py
@@ -66,7 +66,7 @@ def iswin(
 
 def compute_wins(
     uids: typing.List[int],
-    losses_per_uid: typing.Dict[int, typing.List[float]],
+    uid_to_average_loss: typing.Dict[int, float],
     uid_to_block: typing.Dict[int, int],
     epsilon_func: EpsilonFunc,
     current_block: int,
@@ -76,7 +76,7 @@ def compute_wins(
 
     Parameters:
         uids (list): A list of uids to compare.
-        losses_per_uid (dict): A dictionary of losses for each uid by batch.
+        uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches.
         uid_to_block (dict): A dictionary of blocks for each uid.
         epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block.
         current_block: The current block.
@@ -92,26 +92,68 @@ def compute_wins(
             if uid_i == uid_j:
                 continue
 
-            for loss_i, loss_j in zip(losses_per_uid[uid_i], losses_per_uid[uid_j]):
-                wins[uid_i] += (
-                    1
-                    if iswin(
-                        loss_i,
-                        loss_j,
-                        uid_to_block[uid_i],
-                        uid_to_block[uid_j],
-                        epsilon_func,
-                        current_block,
-                    )
-                    else 0
+            wins[uid_i] += (
+                1
+                if iswin(
+                    uid_to_average_loss[uid_i],
+                    uid_to_average_loss[uid_j],
+                    uid_to_block[uid_i],
+                    uid_to_block[uid_j],
+                    epsilon_func,
+                    current_block,
                 )
-                total_matches += 1
-        # Calculate win rate for uid i
-        win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 0
+                else 0
+            )
+            total_matches += 1
+        # Calculate win rate for uid i. Default win_rate to 1 for the case of no matches.
+        win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 1
 
     return wins, win_rate
 
 
+def compute_competitive_uids(
+    uid_to_average_loss: typing.Dict[int, float],
+    uid_to_block: typing.Dict[int, int],
+    epsilon_func: EpsilonFunc,
+) -> typing.List[int]:
+    """
+    Computes the list of any uids that may at one point be the top model.
+
+    Parameters:
+        uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches.
+        uid_to_block (dict): A dictionary of blocks for each uid.
+        epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block.
+
+    Returns:
+        list: A list of uids that may at one point be the top model.
+    """
+    # Get fully decayed loss for every model.
+    fully_decayed_epsilon = 1 - epsilon_func.compute_epsilon(
+        current_block=math.inf, model_block=0
+    )
+    fully_decayed_losses = {
+        uid: uid_to_average_loss[uid] * fully_decayed_epsilon for uid in uid_to_block
+    }
+
+    # Iterate through the models and only keep models who's loss is better than
+    # all models uploaded at an earlier block, after they've fully decayed.
+    # If the model cannot, then there exists at least one model at an earlier block which
+    # will always have a better epislon adjusted loss, thus it will never be the top model.
+    competitive_uids = []
+    for uid, loss in uid_to_average_loss.items():
+        # Check if the current UID beats all earlier (or same block) models at full decay.
+        # all([]) is true so we always keep the earliest model.
+        earlier_uids = [
+            i
+            for i, block in uid_to_block.items()
+            if i != uid and block <= uid_to_block[uid]
+        ]
+        if all(loss < fully_decayed_losses[uid_other] for uid_other in earlier_uids):
+            competitive_uids.append(uid)
+
+    return competitive_uids
+
+
 def check_for_reasonable_output(
     model, input1: torch.Tensor, input2: torch.Tensor, pad_token_id: int
 ) -> bool:

diff --git a/tests/pretrain/test_validation.py b/tests/pretrain/test_validation.py
@@ -0,0 +1,167 @@
+import unittest
+import pretrain as pt
+from taoverse.model.competition.epsilon import LinearDecay
+
+
+class TestValidation(unittest.TestCase):
+    def test_compute_competitive_uids_filters_clones(self):
+        # Check that if the current top model submits a few clones, they are filtered out.
+        uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [75, 100],
+        )
+
+        # Check that if a new top model submits clones, they are filtered out.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 100],
+        )
+
+    def test_compute_competitive_uids_better_models_sequentially_better(self):
+        # Each uploaded model is better than the previous. Expect to keep all of them.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.498}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2500}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 75, 100],
+        )
+
+    def test_compute_competitive_uids_less_than_epsilon_better(self):
+        # Models are sequentially better, but less than epislon at full decay. Expect to only keep the first.
+        uid_to_average_loss = {100: 0.5, 50: 0.4999, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2500}
+        epsilon_func = LinearDecay(0.005, 0.01, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [100],
+        )
+
+    def test_compute_competitive_uids_later_worse_model_filtered(self):
+        # Models are sequentially better, but the last one is worse than a previous model.
+        uid_to_average_loss = {100: 0.5, 50: 0.498, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2500}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 100],
+        )
+
+    def test_compute_competitive_uids_few_models(self):
+        # Make sure the function works with none or only a few models.
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            pt.validation.compute_competitive_uids({}, {}, epsilon_func), []
+        )
+        self.assertEqual(
+            pt.validation.compute_competitive_uids(
+                {100: 0.5}, {100: 1000}, epsilon_func
+            ),
+            [100],
+        )
+        self.assertEqual(
+            pt.validation.compute_competitive_uids(
+                {100: 0.5, 50: 0.7}, {100: 1000, 50: 2000}, epsilon_func
+            ),
+            [100],
+        )
+
+    def test_compute_competitive_uids_same_block(self):
+        # Make sure that if two models are uploaded at the same block, the one with the lower average loss is kept.
+        uid_to_average_loss = {100: 0.5, 50: 0.4}
+        uid_to_block = {100: 1000, 50: 1000}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50],
+        )
+
+        # Check that if a new top model submits clones, they are filtered out.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        epsilon_func = LinearDecay(0.005, 0.0001, 1000)
+        self.assertEqual(
+            sorted(
+                pt.validation.compute_competitive_uids(
+                    uid_to_average_loss, uid_to_block, epsilon_func
+                )
+            ),
+            [50, 100],
+        )
+
+    def test_compute_wins_one_uid(self):
+        # Verifies compute_wins works with a single uid.
+        wins, win_rates = pt.validation.compute_wins(
+            [100], {100: 0.5}, {100: 1000}, LinearDecay(0.005, 0.0001, 1000), 1000
+        )
+        self.assertEqual(wins, {100: 0})
+        self.assertEqual(win_rates, {100: 1.0})
+
+    def test_compute_filtered_win_rates(self):
+        # Mimic what the validator does by first filtering the models and then computing win rates.
+
+        # The current top model with submitted clones.
+        uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001}
+        curr_block = 2001
+        epsilon_func = LinearDecay(0.005, 0.0001, 2000)
+        competitive_uids = pt.validation.compute_competitive_uids(
+            uid_to_average_loss, uid_to_block, epsilon_func
+        )
+        wins, win_rates = pt.validation.compute_wins(
+            competitive_uids,
+            uid_to_average_loss,
+            uid_to_block,
+            epsilon_func,
+            curr_block,
+        )
+        self.assertEqual(wins, {100: 1, 75: 0})
+        self.assertEqual(win_rates, {100: 1.0, 75: 0.0})
+
+        # Verify the case where a new top model submits a few clones.
+        uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499, 80: 0.499}
+        uid_to_block = {100: 1000, 50: 2000, 75: 2001, 80: 2002}
+        curr_block = 2002
+        epsilon_func = LinearDecay(0.005, 0.0001, 2000)
+        competitive_uids = pt.validation.compute_competitive_uids(
+            uid_to_average_loss, uid_to_block, epsilon_func
+        )
+        wins, win_rates = pt.validation.compute_wins(
+            competitive_uids,
+            uid_to_average_loss,
+            uid_to_block,
+            epsilon_func,
+            curr_block,
+        )
+        self.assertEqual(wins, {100: 1, 50: 0})
+        self.assertEqual(win_rates, {100: 1.0, 50: 0.0})