Skip to content

Commit

Permalink
Merge pull request #181 from macrocosm-os/dev
Browse files Browse the repository at this point in the history
Release 4.5.1
  • Loading branch information
cryptal-mc authored Oct 10, 2024
2 parents fde1e89 + 521d012 commit 8468a04
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 31 deletions.
67 changes: 44 additions & 23 deletions constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@
ModelConstraints,
NormValidationConstraints,
)
from taoverse.model.competition.epsilon import (
FixedEpsilon,
LinearDecay
)
from taoverse.model.competition.epsilon import FixedEpsilon, LinearDecay
from competitions.data import CompetitionId

from typing import Dict, List, Tuple
Expand All @@ -37,7 +34,7 @@
# ---------------------------------

# Release
__version__ = "4.5.0"
__version__ = "4.5.1"

# Validator schema version
__validator_version__ = "3.2.0"
Expand Down Expand Up @@ -98,7 +95,7 @@
DATASET_BY_COMPETITION_ID: Dict[CompetitionId, str] = {
CompetitionId.M772_MODEL: pt.dataset.SubsetFalconLoader,
CompetitionId.B3_MODEL: pt.dataset.SubsetFalconLoader,
CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader,
CompetitionId.B7_MODEL: pt.dataset.SubsetFineWebEdu2Loader,
CompetitionId.B14_MODEL: pt.dataset.SubsetFineWebEdu2Loader,
}

Expand Down Expand Up @@ -159,7 +156,9 @@
}

# Defined model constraints by competition id with decaying epsilon
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[CompetitionId, ModelConstraints] = {
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY: Dict[
CompetitionId, ModelConstraints
] = {
CompetitionId.M772_MODEL: ModelConstraints(
max_model_parameter_size=772_000_000,
min_model_parameter_size=572_000_000,
Expand Down Expand Up @@ -215,7 +214,9 @@
}

# Defined model constraints by competition id with decaying epsilon
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[CompetitionId, ModelConstraints] = {
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2: Dict[
CompetitionId, ModelConstraints
] = {
CompetitionId.M772_MODEL: ModelConstraints(
max_model_parameter_size=772_000_000,
min_model_parameter_size=572_000_000,
Expand Down Expand Up @@ -251,7 +252,7 @@
"attn_implementation": "flash_attention_2",
},
eval_block_delay=0,
epsilon_func=LinearDecay(0.005, 0.0001, 100800),
epsilon_func=LinearDecay(0.005, 0.0001, 50400),
max_bytes=29 * 1024 * 1024 * 1024,
),
}
Expand Down Expand Up @@ -309,22 +310,30 @@
[
Competition(
CompetitionId.M772_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.M772_MODEL
],
0.14,
),
Competition(
CompetitionId.B3_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.B3_MODEL
],
0.29,
),
Competition(
CompetitionId.B7_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B7_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.B7_MODEL
],
0.15,
),
Competition(
CompetitionId.B14_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.B14_MODEL
],
0.42,
),
],
Expand All @@ -334,17 +343,23 @@
[
Competition(
CompetitionId.M772_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.M772_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.M772_MODEL
],
0.14,
),
Competition(
CompetitionId.B3_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B3_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.B3_MODEL
],
0.29,
),
Competition(
CompetitionId.B14_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[CompetitionId.B14_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY[
CompetitionId.B14_MODEL
],
0.57,
),
],
Expand All @@ -354,23 +369,27 @@
[
Competition(
CompetitionId.M772_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.M772_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[
CompetitionId.M772_MODEL
],
0.14,
),
Competition(
CompetitionId.B3_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B3_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[
CompetitionId.B3_MODEL
],
0.29,
),
Competition(
CompetitionId.B14_MODEL,
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[CompetitionId.B14_MODEL],
MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2[
CompetitionId.B14_MODEL
],
0.57,
),
],
),


]

for block_and_competitions in COMPETITION_SCHEDULE_BY_BLOCK:
Expand Down Expand Up @@ -400,15 +419,17 @@

# validators number of pages to eval over miners on each step.
pages_per_eval_unpack = 5 # With sample unpacking
pages_per_eval_pack = 18
pages_per_eval_pack = 11

# validator eval batch size.
batch_size = 1
# validator eval batch min to keep for next loop.
sample_min = 5
# Max number of uids that can be either pending eval or currently being evaluated.
# We allow the sample_min per competition + 10 additional models to be held at any one time.
updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10
updated_models_limit = (
sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_LINEAR_DECAY_2) + 10
)
# time required between updates to the chain.
chain_update_cadence = dt.timedelta(minutes=20)
# Number of blocks required between retrying evaluation of a model.
Expand Down
6 changes: 3 additions & 3 deletions neurons/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ async def main(config: bt.config):

# Init model.
# Init model.
tokenizer = ft.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
tokenizer = pt.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
model = await load_starting_model(config, metagraph, chain_metadata_store, kwargs)
model = model.train()
model = model.to(config.device)
Expand Down Expand Up @@ -410,11 +410,11 @@ async def main(config: bt.config):
)

# First, reload the best model from the training run.
model_to_upload = ft.mining.load_local_model(
model_to_upload = pt.mining.load_local_model(
model_dir, model_constraints.kwargs
)

await ft.mining.push(
await pt.mining.push(
model_to_upload,
config.hf_repo_id,
wallet,
Expand Down
5 changes: 3 additions & 2 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def clean_models(self):

self.local_store.delete_unreferenced_models(
valid_models_by_hotkey=evaluated_hotkeys_to_model_id,
grace_period_seconds=300,
grace_period_seconds=600,
)
except Exception as e:
bt.logging.error(f"Error in clean loop: {e}")
Expand Down Expand Up @@ -891,11 +891,12 @@ async def run_step(self):
tokenizer.eos_token_id,
pack_samples,
),
ttl=400,
ttl=430,
mode="spawn",
)

del model_i

except Exception as e:
bt.logging.error(
f"Error in eval loop: {e}. Setting losses for uid: {uid_i} to infinity."
Expand Down
7 changes: 5 additions & 2 deletions pretrain/mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from competitions.data import CompetitionId


def model_path(base_dir: str, run_id: str) -> str:
"""
Constructs a file path for storing the model relating to a training run.
Expand Down Expand Up @@ -96,7 +97,8 @@ async def push(

bt.logging.debug("Started uploading model to hugging face...")
model_id = await remote_model_store.upload_model(
Model(id=model_id, pt_model=model), model_constraints)
Model(id=model_id, pt_model=model), model_constraints
)

bt.logging.success("Uploaded model to hugging face.")

Expand Down Expand Up @@ -190,6 +192,7 @@ def load_local_model(model_dir: str, kwargs: Dict[str, Any]) -> PreTrainedModel:
**kwargs,
)


async def load_remote_model(
uid: int,
download_dir: str,
Expand Down Expand Up @@ -245,7 +248,7 @@ async def load_best_model(
remote_model_store: Optional[RemoteModelStore] = None,
) -> PreTrainedModel:
"""Loads the model from the best performing miner to download_dir"""
best_uid = ft.graph.best_uid(competition_id=competition_id)
best_uid = pt.graph.best_uid(competition_id=competition_id)
if best_uid is None:
raise ValueError(f"No best models found for {competition_id}")

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ transformers==4.44.1
wandb
datasets
flash-attn
taoverse==1.0.5
taoverse==1.0.6

0 comments on commit 8468a04

Please sign in to comment.