Skip to content

Commit

Permalink
Add model type to results (#26)
Browse files Browse the repository at this point in the history
* up

* style

* add to scores
  • Loading branch information
natolambert authored Feb 15, 2024
1 parent 84c0a9b commit 060d9c2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
5 changes: 5 additions & 0 deletions scripts/run_dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def main():

results_grouped = {}
results_grouped["model"] = args.model
results_grouped["ref_model"] = args.ref_model
results_grouped["model_type"] = "DPO" # TODO add options for references free, DPO-ref-free, or DPO-normalized
results_grouped["chat_template"] = args.chat_template
# print per subset and log into results_grouped file
present_subsets = np.unique(subsets)
Expand All @@ -210,6 +212,9 @@ def main():
# upload chosen-rejected with scores
# create new json with scores and upload
scores_dict = out_dataset.to_dict()
scores_dict["model"] = args.model
scores_dict["model_type"] = "DPO"
scores_dict["chat_template"] = args.chat_template
sub_path_scores = "eval-set-scores/" if not args.pref_sets else "pref-sets-scores/"

scores_url = save_to_hub(scores_dict, args.model, sub_path_scores, args.debug)
Expand Down
12 changes: 10 additions & 2 deletions scripts/run_rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def main():
args = get_args()
quantized = True # only Starling isn't quantized for now
custom_dialogue = False
model_type = "Seq. Classifier"
# some models need custom code to be run
if "oasst" in args.model or "oasst" in args.chat_template:
from herm.models import openassistant # noqa
Expand All @@ -92,12 +93,14 @@ def main():
custom_dialogue = True
model_builder = DebertaV2PairRM.from_pretrained
pipeline_builder = PairRMPipeline
model_type = "Custom Classifier"
elif "SteamSHP" in args.model or "SteamSHP" in args.chat_template:
from herm.models.shp import SHPPipeline

custom_dialogue = True
model_builder = T5ForConditionalGeneration.from_pretrained
pipeline_builder = SHPPipeline
model_type = "Custom Classifier"
elif "beaver" in args.model or "pku-align" in args.chat_template:
from herm.models.beaver import BeaverPipeline, LlamaForScore

Expand Down Expand Up @@ -256,7 +259,7 @@ def custom_collate_fn(batch):
for step, batch in enumerate(tqdm(dataloader, desc="RM batch steps")):
logger.info(f"RM inference step {step}/{len(dataloader)}")

if "PairRM" in args.model or "SteamSHP" in args.model:
if model_type == "Custom Classifier":
text_rejected = [b["text_rejected"] for b in batch]
text_chosen = [b["text_chosen"] for b in batch]
results_sub = reward_pipe(text_chosen, text_rejected, **reward_pipeline_kwargs)
Expand Down Expand Up @@ -303,6 +306,7 @@ def custom_collate_fn(batch):
# get core dataset
results_grouped = {}
results_grouped["model"] = args.model
results_grouped["model_type"] = model_type
results_grouped["chat_template"] = args.chat_template

# print per subset and log into results_grouped file
Expand All @@ -323,9 +327,13 @@ def custom_collate_fn(batch):
logger.info(f"Uploaded reward model results to {results_url}")

# upload chosen-rejected with scores
if not ("PairRM" in args.model or "SteamSHP" in args.model):
if not model_type == "Custom Classifier": # custom classifiers do not return scores
# create new json with scores and upload
scores_dict = out_dataset.to_dict()
scores_dict["model"] = args.model
scores_dict["model_type"] = model_type
scores_dict["chat_template"] = args.chat_template

sub_path_scores = "eval-set-scores/" if not args.pref_sets else "pref-sets-scores/"

scores_url = save_to_hub(scores_dict, args.model, sub_path_scores, args.debug)
Expand Down

0 comments on commit 060d9c2

Please sign in to comment.