diff --git a/constants/__init__.py b/constants/__init__.py index 61c590d1..052f3c26 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -34,10 +34,10 @@ # --------------------------------- # Release -__version__ = "4.5.2" +__version__ = "4.5.3" # Validator schema version -__validator_version__ = "3.3.0" +__validator_version__ = "3.4.0" version_split = __validator_version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) diff --git a/neurons/validator.py b/neurons/validator.py index 936fbeb6..4681e477 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -799,14 +799,14 @@ async def run_step(self): tokenizer = pt.model.load_tokenizer( competition.constraints, cache_dir=self.config.model_dir ) - + if cur_block >= constants.sample_pack_block: pack_samples = True pages_per_eval = constants.pages_per_eval_pack else: pack_samples = False pages_per_eval = constants.pages_per_eval_unpack - + # If the option is set in the config, override pages_per_eval = ( self.config.pages_per_eval @@ -896,7 +896,7 @@ async def run_step(self): ) del model_i - + except Exception as e: bt.logging.error( f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity." @@ -914,14 +914,27 @@ async def run_step(self): # Compute wins and win rates per uid. # Take the average loss across all batches for comparison of best model. - # Keep it as a list of 1 for later calculations. - losses_per_uid = { - uid: [state.avg_loss()] for uid, state in uid_to_state.items() + uid_to_average_loss = { + uid: state.avg_loss() for uid, state in uid_to_state.items() } uid_to_block = {uid: state.block for uid, state in uid_to_state.items()} + + # Filter to the list of uids that may at one point be a top model. + competitive_uids = pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, competition.constraints.epsilon_func + ) + + # Log which models got dropped for the second pass. + dropped_uids = [uid for uid in uids if uid not in competitive_uids] + if dropped_uids: + bt.logging.info( + f"The following uids were not included in the win rate calculation because they did not beat the fully decayed loss of any previously submitted model in this eval batch: {dropped_uids}." + ) + + # Calculate new wins and win_rate with only the competitive uids considered. wins, win_rate = pt.validation.compute_wins( - uids, - losses_per_uid, + competitive_uids, + uid_to_average_loss, uid_to_block, competition.constraints.epsilon_func, cur_block, @@ -932,7 +945,7 @@ async def run_step(self): # Compute softmaxed weights based on win rate. model_weights = torch.tensor( - [win_rate[uid] for uid in uids], dtype=torch.float32 + [win_rate.get(uid, 0) for uid in uids], dtype=torch.float32 ) step_weights = torch.softmax(model_weights / constants.temperature, dim=0) @@ -977,6 +990,13 @@ async def run_step(self): : self.config.sample_min ] ) + # Make sure we always keep around sample_min number of models to maintain previous behavior. + if len(models_to_keep) < self.config.sample_min: + for uid in sorted(uid_to_average_loss, key=uid_to_average_loss.get): + if len(models_to_keep) >= self.config.sample_min: + break + models_to_keep.add(uid) + self._update_uids_to_eval( competition.id, models_to_keep, active_competition_ids ) @@ -1102,8 +1122,9 @@ def log_step( "epsilon_adv": competition_epsilon_func.compute_epsilon( current_block, uid_to_state[uid].block ), - "win_rate": win_rate[uid], - "win_total": wins[uid], + # We use 0 in the case where a uid was not competitive and therefore not used in win rate calcs. + "win_rate": win_rate[uid] if uid in win_rate else 0, + "win_total": wins[uid] if uid in wins else 0, "weight": self.weights[uid].item(), "norm_weight": sub_competition_weights[idx].item(), } diff --git a/pretrain/validation.py b/pretrain/validation.py index b0aeaa76..a4b21868 100644 --- a/pretrain/validation.py +++ b/pretrain/validation.py @@ -66,7 +66,7 @@ def iswin( def compute_wins( uids: typing.List[int], - losses_per_uid: typing.Dict[int, typing.List[float]], + uid_to_average_loss: typing.Dict[int, float], uid_to_block: typing.Dict[int, int], epsilon_func: EpsilonFunc, current_block: int, @@ -76,7 +76,7 @@ def compute_wins( Parameters: uids (list): A list of uids to compare. - losses_per_uid (dict): A dictionary of losses for each uid by batch. + uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches. uid_to_block (dict): A dictionary of blocks for each uid. epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block. current_block: The current block. @@ -92,26 +92,68 @@ def compute_wins( if uid_i == uid_j: continue - for loss_i, loss_j in zip(losses_per_uid[uid_i], losses_per_uid[uid_j]): - wins[uid_i] += ( - 1 - if iswin( - loss_i, - loss_j, - uid_to_block[uid_i], - uid_to_block[uid_j], - epsilon_func, - current_block, - ) - else 0 + wins[uid_i] += ( + 1 + if iswin( + uid_to_average_loss[uid_i], + uid_to_average_loss[uid_j], + uid_to_block[uid_i], + uid_to_block[uid_j], + epsilon_func, + current_block, ) - total_matches += 1 - # Calculate win rate for uid i - win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 0 + else 0 + ) + total_matches += 1 + # Calculate win rate for uid i. Default win_rate to 1 for the case of no matches. + win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 1 return wins, win_rate +def compute_competitive_uids( + uid_to_average_loss: typing.Dict[int, float], + uid_to_block: typing.Dict[int, int], + epsilon_func: EpsilonFunc, +) -> typing.List[int]: + """ + Computes the list of any uids that may at one point be the top model. + + Parameters: + uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches. + uid_to_block (dict): A dictionary of blocks for each uid. + epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block. + + Returns: + list: A list of uids that may at one point be the top model. + """ + # Get fully decayed loss for every model. + fully_decayed_epsilon = 1 - epsilon_func.compute_epsilon( + current_block=math.inf, model_block=0 + ) + fully_decayed_losses = { + uid: uid_to_average_loss[uid] * fully_decayed_epsilon for uid in uid_to_block + } + + # Iterate through the models and only keep models who's loss is better than + # all models uploaded at an earlier block, after they've fully decayed. + # If the model cannot, then there exists at least one model at an earlier block which + # will always have a better epislon adjusted loss, thus it will never be the top model. + competitive_uids = [] + for uid, loss in uid_to_average_loss.items(): + # Check if the current UID beats all earlier (or same block) models at full decay. + # all([]) is true so we always keep the earliest model. + earlier_uids = [ + i + for i, block in uid_to_block.items() + if i != uid and block <= uid_to_block[uid] + ] + if all(loss < fully_decayed_losses[uid_other] for uid_other in earlier_uids): + competitive_uids.append(uid) + + return competitive_uids + + def check_for_reasonable_output( model, input1: torch.Tensor, input2: torch.Tensor, pad_token_id: int ) -> bool: diff --git a/tests/pretrain/test_validation.py b/tests/pretrain/test_validation.py new file mode 100644 index 00000000..3393bb7b --- /dev/null +++ b/tests/pretrain/test_validation.py @@ -0,0 +1,167 @@ +import unittest +import pretrain as pt +from taoverse.model.competition.epsilon import LinearDecay + + +class TestValidation(unittest.TestCase): + def test_compute_competitive_uids_filters_clones(self): + # Check that if the current top model submits a few clones, they are filtered out. + uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2001} + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [75, 100], + ) + + # Check that if a new top model submits clones, they are filtered out. + uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2001} + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [50, 100], + ) + + def test_compute_competitive_uids_better_models_sequentially_better(self): + # Each uploaded model is better than the previous. Expect to keep all of them. + uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.498} + uid_to_block = {100: 1000, 50: 2000, 75: 2500} + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [50, 75, 100], + ) + + def test_compute_competitive_uids_less_than_epsilon_better(self): + # Models are sequentially better, but less than epislon at full decay. Expect to only keep the first. + uid_to_average_loss = {100: 0.5, 50: 0.4999, 75: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2500} + epsilon_func = LinearDecay(0.005, 0.01, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [100], + ) + + def test_compute_competitive_uids_later_worse_model_filtered(self): + # Models are sequentially better, but the last one is worse than a previous model. + uid_to_average_loss = {100: 0.5, 50: 0.498, 75: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2500} + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [50, 100], + ) + + def test_compute_competitive_uids_few_models(self): + # Make sure the function works with none or only a few models. + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + pt.validation.compute_competitive_uids({}, {}, epsilon_func), [] + ) + self.assertEqual( + pt.validation.compute_competitive_uids( + {100: 0.5}, {100: 1000}, epsilon_func + ), + [100], + ) + self.assertEqual( + pt.validation.compute_competitive_uids( + {100: 0.5, 50: 0.7}, {100: 1000, 50: 2000}, epsilon_func + ), + [100], + ) + + def test_compute_competitive_uids_same_block(self): + # Make sure that if two models are uploaded at the same block, the one with the lower average loss is kept. + uid_to_average_loss = {100: 0.5, 50: 0.4} + uid_to_block = {100: 1000, 50: 1000} + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [50], + ) + + # Check that if a new top model submits clones, they are filtered out. + uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2001} + epsilon_func = LinearDecay(0.005, 0.0001, 1000) + self.assertEqual( + sorted( + pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + ), + [50, 100], + ) + + def test_compute_wins_one_uid(self): + # Verifies compute_wins works with a single uid. + wins, win_rates = pt.validation.compute_wins( + [100], {100: 0.5}, {100: 1000}, LinearDecay(0.005, 0.0001, 1000), 1000 + ) + self.assertEqual(wins, {100: 0}) + self.assertEqual(win_rates, {100: 1.0}) + + def test_compute_filtered_win_rates(self): + # Mimic what the validator does by first filtering the models and then computing win rates. + + # The current top model with submitted clones. + uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2001} + curr_block = 2001 + epsilon_func = LinearDecay(0.005, 0.0001, 2000) + competitive_uids = pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + wins, win_rates = pt.validation.compute_wins( + competitive_uids, + uid_to_average_loss, + uid_to_block, + epsilon_func, + curr_block, + ) + self.assertEqual(wins, {100: 1, 75: 0}) + self.assertEqual(win_rates, {100: 1.0, 75: 0.0}) + + # Verify the case where a new top model submits a few clones. + uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499, 80: 0.499} + uid_to_block = {100: 1000, 50: 2000, 75: 2001, 80: 2002} + curr_block = 2002 + epsilon_func = LinearDecay(0.005, 0.0001, 2000) + competitive_uids = pt.validation.compute_competitive_uids( + uid_to_average_loss, uid_to_block, epsilon_func + ) + wins, win_rates = pt.validation.compute_wins( + competitive_uids, + uid_to_average_loss, + uid_to_block, + epsilon_func, + curr_block, + ) + self.assertEqual(wins, {100: 1, 50: 0}) + self.assertEqual(win_rates, {100: 1.0, 50: 0.0})