Skip to content

Commit

Permalink
Merge pull request #186 from macrocosm-os/dev
Browse files Browse the repository at this point in the history
Release 4.5.3
  • Loading branch information
cryptal-mc authored Oct 15, 2024
2 parents 0637ee3 + b4e2fcc commit dd068a2
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 30 deletions.
4 changes: 2 additions & 2 deletions constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
# ---------------------------------

# Release
__version__ = "4.5.2"
__version__ = "4.5.3"

# Validator schema version
__validator_version__ = "3.3.0"
__validator_version__ = "3.4.0"
version_split = __validator_version__.split(".")
__spec_version__ = (
(1000 * int(version_split[0]))
Expand Down
43 changes: 32 additions & 11 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,14 +799,14 @@ async def run_step(self):
tokenizer = pt.model.load_tokenizer(
competition.constraints, cache_dir=self.config.model_dir
)

if cur_block >= constants.sample_pack_block:
pack_samples = True
pages_per_eval = constants.pages_per_eval_pack
else:
pack_samples = False
pages_per_eval = constants.pages_per_eval_unpack

# If the option is set in the config, override
pages_per_eval = (
self.config.pages_per_eval
Expand Down Expand Up @@ -896,7 +896,7 @@ async def run_step(self):
)

del model_i

except Exception as e:
bt.logging.error(
f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity."
Expand All @@ -914,14 +914,27 @@ async def run_step(self):

# Compute wins and win rates per uid.
# Take the average loss across all batches for comparison of best model.
# Keep it as a list of 1 for later calculations.
losses_per_uid = {
uid: [state.avg_loss()] for uid, state in uid_to_state.items()
uid_to_average_loss = {
uid: state.avg_loss() for uid, state in uid_to_state.items()
}
uid_to_block = {uid: state.block for uid, state in uid_to_state.items()}

# Filter to the list of uids that may at one point be a top model.
competitive_uids = pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, competition.constraints.epsilon_func
)

# Log which models got dropped for the second pass.
dropped_uids = [uid for uid in uids if uid not in competitive_uids]
if dropped_uids:
bt.logging.info(
f"The following uids were not included in the win rate calculation because they did not beat the fully decayed loss of any previously submitted model in this eval batch: {dropped_uids}."
)

# Calculate new wins and win_rate with only the competitive uids considered.
wins, win_rate = pt.validation.compute_wins(
uids,
losses_per_uid,
competitive_uids,
uid_to_average_loss,
uid_to_block,
competition.constraints.epsilon_func,
cur_block,
Expand All @@ -932,7 +945,7 @@ async def run_step(self):

# Compute softmaxed weights based on win rate.
model_weights = torch.tensor(
[win_rate[uid] for uid in uids], dtype=torch.float32
[win_rate.get(uid, 0) for uid in uids], dtype=torch.float32
)
step_weights = torch.softmax(model_weights / constants.temperature, dim=0)

Expand Down Expand Up @@ -977,6 +990,13 @@ async def run_step(self):
: self.config.sample_min
]
)
# Make sure we always keep around sample_min number of models to maintain previous behavior.
if len(models_to_keep) < self.config.sample_min:
for uid in sorted(uid_to_average_loss, key=uid_to_average_loss.get):
if len(models_to_keep) >= self.config.sample_min:
break
models_to_keep.add(uid)

self._update_uids_to_eval(
competition.id, models_to_keep, active_competition_ids
)
Expand Down Expand Up @@ -1102,8 +1122,9 @@ def log_step(
"epsilon_adv": competition_epsilon_func.compute_epsilon(
current_block, uid_to_state[uid].block
),
"win_rate": win_rate[uid],
"win_total": wins[uid],
# We use 0 in the case where a uid was not competitive and therefore not used in win rate calcs.
"win_rate": win_rate[uid] if uid in win_rate else 0,
"win_total": wins[uid] if uid in wins else 0,
"weight": self.weights[uid].item(),
"norm_weight": sub_competition_weights[idx].item(),
}
Expand Down
76 changes: 59 additions & 17 deletions pretrain/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def iswin(

def compute_wins(
uids: typing.List[int],
losses_per_uid: typing.Dict[int, typing.List[float]],
uid_to_average_loss: typing.Dict[int, float],
uid_to_block: typing.Dict[int, int],
epsilon_func: EpsilonFunc,
current_block: int,
Expand All @@ -76,7 +76,7 @@ def compute_wins(
Parameters:
uids (list): A list of uids to compare.
losses_per_uid (dict): A dictionary of losses for each uid by batch.
uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches.
uid_to_block (dict): A dictionary of blocks for each uid.
epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block.
current_block: The current block.
Expand All @@ -92,26 +92,68 @@ def compute_wins(
if uid_i == uid_j:
continue

for loss_i, loss_j in zip(losses_per_uid[uid_i], losses_per_uid[uid_j]):
wins[uid_i] += (
1
if iswin(
loss_i,
loss_j,
uid_to_block[uid_i],
uid_to_block[uid_j],
epsilon_func,
current_block,
)
else 0
wins[uid_i] += (
1
if iswin(
uid_to_average_loss[uid_i],
uid_to_average_loss[uid_j],
uid_to_block[uid_i],
uid_to_block[uid_j],
epsilon_func,
current_block,
)
total_matches += 1
# Calculate win rate for uid i
win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 0
else 0
)
total_matches += 1
# Calculate win rate for uid i. Default win_rate to 1 for the case of no matches.
win_rate[uid_i] = wins[uid_i] / total_matches if total_matches > 0 else 1

return wins, win_rate


def compute_competitive_uids(
uid_to_average_loss: typing.Dict[int, float],
uid_to_block: typing.Dict[int, int],
epsilon_func: EpsilonFunc,
) -> typing.List[int]:
"""
Computes the list of any uids that may at one point be the top model.
Parameters:
uid_to_average_loss (dict): A dictionary of average loss for each uid over all batches.
uid_to_block (dict): A dictionary of blocks for each uid.
epsilon_func (EpsilonFunc): Function that determines how much advantage to give to the earlier block.
Returns:
list: A list of uids that may at one point be the top model.
"""
# Get fully decayed loss for every model.
fully_decayed_epsilon = 1 - epsilon_func.compute_epsilon(
current_block=math.inf, model_block=0
)
fully_decayed_losses = {
uid: uid_to_average_loss[uid] * fully_decayed_epsilon for uid in uid_to_block
}

# Iterate through the models and only keep models who's loss is better than
# all models uploaded at an earlier block, after they've fully decayed.
# If the model cannot, then there exists at least one model at an earlier block which
# will always have a better epislon adjusted loss, thus it will never be the top model.
competitive_uids = []
for uid, loss in uid_to_average_loss.items():
# Check if the current UID beats all earlier (or same block) models at full decay.
# all([]) is true so we always keep the earliest model.
earlier_uids = [
i
for i, block in uid_to_block.items()
if i != uid and block <= uid_to_block[uid]
]
if all(loss < fully_decayed_losses[uid_other] for uid_other in earlier_uids):
competitive_uids.append(uid)

return competitive_uids


def check_for_reasonable_output(
model, input1: torch.Tensor, input2: torch.Tensor, pad_token_id: int
) -> bool:
Expand Down
167 changes: 167 additions & 0 deletions tests/pretrain/test_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import unittest
import pretrain as pt
from taoverse.model.competition.epsilon import LinearDecay


class TestValidation(unittest.TestCase):
def test_compute_competitive_uids_filters_clones(self):
# Check that if the current top model submits a few clones, they are filtered out.
uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2001}
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[75, 100],
)

# Check that if a new top model submits clones, they are filtered out.
uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2001}
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[50, 100],
)

def test_compute_competitive_uids_better_models_sequentially_better(self):
# Each uploaded model is better than the previous. Expect to keep all of them.
uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.498}
uid_to_block = {100: 1000, 50: 2000, 75: 2500}
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[50, 75, 100],
)

def test_compute_competitive_uids_less_than_epsilon_better(self):
# Models are sequentially better, but less than epislon at full decay. Expect to only keep the first.
uid_to_average_loss = {100: 0.5, 50: 0.4999, 75: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2500}
epsilon_func = LinearDecay(0.005, 0.01, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[100],
)

def test_compute_competitive_uids_later_worse_model_filtered(self):
# Models are sequentially better, but the last one is worse than a previous model.
uid_to_average_loss = {100: 0.5, 50: 0.498, 75: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2500}
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[50, 100],
)

def test_compute_competitive_uids_few_models(self):
# Make sure the function works with none or only a few models.
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
pt.validation.compute_competitive_uids({}, {}, epsilon_func), []
)
self.assertEqual(
pt.validation.compute_competitive_uids(
{100: 0.5}, {100: 1000}, epsilon_func
),
[100],
)
self.assertEqual(
pt.validation.compute_competitive_uids(
{100: 0.5, 50: 0.7}, {100: 1000, 50: 2000}, epsilon_func
),
[100],
)

def test_compute_competitive_uids_same_block(self):
# Make sure that if two models are uploaded at the same block, the one with the lower average loss is kept.
uid_to_average_loss = {100: 0.5, 50: 0.4}
uid_to_block = {100: 1000, 50: 1000}
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[50],
)

# Check that if a new top model submits clones, they are filtered out.
uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2001}
epsilon_func = LinearDecay(0.005, 0.0001, 1000)
self.assertEqual(
sorted(
pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
),
[50, 100],
)

def test_compute_wins_one_uid(self):
# Verifies compute_wins works with a single uid.
wins, win_rates = pt.validation.compute_wins(
[100], {100: 0.5}, {100: 1000}, LinearDecay(0.005, 0.0001, 1000), 1000
)
self.assertEqual(wins, {100: 0})
self.assertEqual(win_rates, {100: 1.0})

def test_compute_filtered_win_rates(self):
# Mimic what the validator does by first filtering the models and then computing win rates.

# The current top model with submitted clones.
uid_to_average_loss = {100: 0.5, 50: 0.5, 75: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2001}
curr_block = 2001
epsilon_func = LinearDecay(0.005, 0.0001, 2000)
competitive_uids = pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
wins, win_rates = pt.validation.compute_wins(
competitive_uids,
uid_to_average_loss,
uid_to_block,
epsilon_func,
curr_block,
)
self.assertEqual(wins, {100: 1, 75: 0})
self.assertEqual(win_rates, {100: 1.0, 75: 0.0})

# Verify the case where a new top model submits a few clones.
uid_to_average_loss = {100: 0.5, 50: 0.499, 75: 0.499, 80: 0.499}
uid_to_block = {100: 1000, 50: 2000, 75: 2001, 80: 2002}
curr_block = 2002
epsilon_func = LinearDecay(0.005, 0.0001, 2000)
competitive_uids = pt.validation.compute_competitive_uids(
uid_to_average_loss, uid_to_block, epsilon_func
)
wins, win_rates = pt.validation.compute_wins(
competitive_uids,
uid_to_average_loss,
uid_to_block,
epsilon_func,
curr_block,
)
self.assertEqual(wins, {100: 1, 50: 0})
self.assertEqual(win_rates, {100: 1.0, 50: 0.0})

0 comments on commit dd068a2

Please sign in to comment.