diff --git a/constants/__init__.py b/constants/__init__.py index 623bfde1..37d2664a 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -24,10 +24,7 @@ ModelConstraints, NormValidationConstraints, ) -from taoverse.model.competition.epsilon import ( - FixedEpsilon, - LinearDecay -) +from taoverse.model.competition.epsilon import FixedEpsilon, LinearDecay from competitions.data import CompetitionId from typing import Dict, List, Tuple @@ -37,7 +34,7 @@ # --------------------------------- # Release -__version__ = "4.5.0" +__version__ = "4.5.1" # Validator schema version __validator_version__ = "3.2.0" @@ -98,7 +95,7 @@ DATASET_BY_COMPETITION_ID: Dict[CompetitionId, str] = { CompetitionId.M772_MODEL: pt.dataset.SubsetFalconLoader, CompetitionId.B3_MODEL: pt.dataset.SubsetFalconLoader, - CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader, + CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader, CompetitionId.B14_MODEL: pt.dataset.SubsetFineWebEdu2Loader, } @@ -159,7 +156,9 @@ } # Defined model constraints by competition id with decaying epsilon -MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[CompetitionId, ModelConstraints] = { +MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[ + CompetitionId, ModelConstraints +] = { CompetitionId.M772_MODEL: ModelConstraints( max_model_parameter_size=772_000_000, min_model_parameter_size=572_000_000, @@ -215,7 +214,9 @@ } # Defined model constraints by competition id with decaying epsilon -MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[CompetitionId, ModelConstraints] = { +MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[ + CompetitionId, ModelConstraints +] = { CompetitionId.M772_MODEL: ModelConstraints( max_model_parameter_size=772_000_000, min_model_parameter_size=572_000_000, @@ -251,7 +252,7 @@ "attn_implementation": "flash_attention_2", }, eval_block_delay=0, - epsilon_func=LinearDecay(0.005, 0.0001, 100800), + epsilon_func=LinearDecay(0.005, 0.0001, 50400), max_bytes=29 * 1024 * 1024 * 1024, ), } @@ -309,22 +310,30 @@ [ Competition( CompetitionId.M772_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.M772_MODEL + ], 0.14, ), Competition( CompetitionId.B3_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.B3_MODEL + ], 0.29, ), Competition( CompetitionId.B7_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B7_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.B7_MODEL + ], 0.15, ), Competition( CompetitionId.B14_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.B14_MODEL + ], 0.42, ), ], @@ -334,17 +343,23 @@ [ Competition( CompetitionId.M772_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.M772_MODEL + ], 0.14, ), Competition( CompetitionId.B3_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.B3_MODEL + ], 0.29, ), Competition( CompetitionId.B14_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[ + CompetitionId.B14_MODEL + ], 0.57, ), ], @@ -354,23 +369,27 @@ [ Competition( CompetitionId.M772_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.M772_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[ + CompetitionId.M772_MODEL + ], 0.14, ), Competition( CompetitionId.B3_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B3_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[ + CompetitionId.B3_MODEL + ], 0.29, ), Competition( CompetitionId.B14_MODEL, - MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B14_MODEL], + MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[ + CompetitionId.B14_MODEL + ], 0.57, ), ], ), - - ] for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK: @@ -400,7 +419,7 @@ # validators number of pages to eval over miners on each step. pages_per_eval_unpack = 5 # With sample unpacking -pages_per_eval_pack = 18 +pages_per_eval_pack = 11 # validator eval batch size. batch_size = 1 @@ -408,7 +427,9 @@ sample_min = 5 # Max number of uids that can be either pending eval or currently being evaluated. # We allow the sample_min per competition + 10 additional models to be held at any one time. -updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10 +updated_models_limit = ( + sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10 +) # time required between updates to the chain. chain_update_cadence = dt.timedelta(minutes=20) # Number of blocks required between retrying evaluation of a model. diff --git a/neurons/miner.py b/neurons/miner.py index f96c4b9c..06deea04 100644 --- a/neurons/miner.py +++ b/neurons/miner.py @@ -278,7 +278,7 @@ async def main(config: bt.config): # Init model. # Init model. - tokenizer = ft.model.load_tokenizer(model_constraints, cache_dir=config.model_dir) + tokenizer = pt.model.load_tokenizer(model_constraints, cache_dir=config.model_dir) model = await load_starting_model(config, metagraph, chain_metadata_store, kwargs) model = model.train() model = model.to(config.device) @@ -410,11 +410,11 @@ async def main(config: bt.config): ) # First, reload the best model from the training run. - model_to_upload = ft.mining.load_local_model( + model_to_upload = pt.mining.load_local_model( model_dir, model_constraints.kwargs ) - await ft.mining.push( + await pt.mining.push( model_to_upload, config.hf_repo_id, wallet, diff --git a/neurons/validator.py b/neurons/validator.py index 0b2a37d8..936fbeb6 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -648,7 +648,7 @@ def clean_models(self): self.local_store.delete_unreferenced_models( valid_models_by_hotkey=evaluated_hotkeys_to_model_id, - grace_period_seconds=300, + grace_period_seconds=600, ) except Exception as e: bt.logging.error(f"Error in clean loop: {e}") @@ -891,11 +891,12 @@ async def run_step(self): tokenizer.eos_token_id, pack_samples, ), - ttl=400, + ttl=430, mode="spawn", ) del model_i + except Exception as e: bt.logging.error( f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity." diff --git a/pretrain/mining.py b/pretrain/mining.py index 3632ae4a..1f0eb668 100644 --- a/pretrain/mining.py +++ b/pretrain/mining.py @@ -46,6 +46,7 @@ from competitions.data import CompetitionId + def model_path(base_dir: str, run_id: str) -> str: """ Constructs a file path for storing the model relating to a training run. @@ -96,7 +97,8 @@ async def push( bt.logging.debug("Started uploading model to hugging face...") model_id = await remote_model_store.upload_model( - Model(id=model_id, pt_model=model), model_constraints) + Model(id=model_id, pt_model=model), model_constraints + ) bt.logging.success("Uploaded model to hugging face.") @@ -190,6 +192,7 @@ def load_local_model(model_dir: str, kwargs: Dict[str, Any]) -> PreTrainedModel: **kwargs, ) + async def load_remote_model( uid: int, download_dir: str, @@ -245,7 +248,7 @@ async def load_best_model( remote_model_store: Optional[RemoteModelStore] = None, ) -> PreTrainedModel: """Loads the model from the best performing miner to download_dir""" - best_uid = ft.graph.best_uid(competition_id=competition_id) + best_uid = pt.graph.best_uid(competition_id=competition_id) if best_uid is None: raise ValueError(f"No best models found for {competition_id}") diff --git a/requirements.txt b/requirements.txt index f4cde013..4725d2c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,4 @@ transformers==4.44.1 wandb datasets flash-attn -taoverse==1.0.5 +taoverse==1.0.6