Add model type to results (#26)

* up * style * add to scores
allenai · Feb 15, 2024 · 060d9c2 · 060d9c2
1 parent 84c0a9b
commit 060d9c2
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 2 deletions.
diff --git a/scripts/run_dpo.py b/scripts/run_dpo.py
@@ -189,6 +189,8 @@ def main():
 
     results_grouped = {}
     results_grouped["model"] = args.model
+    results_grouped["ref_model"] = args.ref_model
+    results_grouped["model_type"] = "DPO"  # TODO add options for references free, DPO-ref-free, or DPO-normalized
     results_grouped["chat_template"] = args.chat_template
     # print per subset and log into results_grouped file
     present_subsets = np.unique(subsets)
@@ -210,6 +212,9 @@ def main():
     # upload chosen-rejected with scores
     # create new json with scores and upload
     scores_dict = out_dataset.to_dict()
+    scores_dict["model"] = args.model
+    scores_dict["model_type"] = "DPO"
+    scores_dict["chat_template"] = args.chat_template
     sub_path_scores = "eval-set-scores/" if not args.pref_sets else "pref-sets-scores/"
 
     scores_url = save_to_hub(scores_dict, args.model, sub_path_scores, args.debug)

diff --git a/scripts/run_rm.py b/scripts/run_rm.py
@@ -69,6 +69,7 @@ def main():
     args = get_args()
     quantized = True  # only Starling isn't quantized for now
     custom_dialogue = False
+    model_type = "Seq. Classifier"
     # some models need custom code to be run
     if "oasst" in args.model or "oasst" in args.chat_template:
         from herm.models import openassistant  # noqa
@@ -92,12 +93,14 @@ def main():
         custom_dialogue = True
         model_builder = DebertaV2PairRM.from_pretrained
         pipeline_builder = PairRMPipeline
+        model_type = "Custom Classifier"
     elif "SteamSHP" in args.model or "SteamSHP" in args.chat_template:
         from herm.models.shp import SHPPipeline
 
         custom_dialogue = True
         model_builder = T5ForConditionalGeneration.from_pretrained
         pipeline_builder = SHPPipeline
+        model_type = "Custom Classifier"
     elif "beaver" in args.model or "pku-align" in args.chat_template:
         from herm.models.beaver import BeaverPipeline, LlamaForScore
 
@@ -256,7 +259,7 @@ def custom_collate_fn(batch):
         for step, batch in enumerate(tqdm(dataloader, desc="RM batch steps")):
             logger.info(f"RM inference step {step}/{len(dataloader)}")
 
-            if "PairRM" in args.model or "SteamSHP" in args.model:
+            if model_type == "Custom Classifier":
                 text_rejected = [b["text_rejected"] for b in batch]
                 text_chosen = [b["text_chosen"] for b in batch]
                 results_sub = reward_pipe(text_chosen, text_rejected, **reward_pipeline_kwargs)
@@ -303,6 +306,7 @@ def custom_collate_fn(batch):
     # get core dataset
     results_grouped = {}
     results_grouped["model"] = args.model
+    results_grouped["model_type"] = model_type
     results_grouped["chat_template"] = args.chat_template
 
     # print per subset and log into results_grouped file
@@ -323,9 +327,13 @@ def custom_collate_fn(batch):
         logger.info(f"Uploaded reward model results to {results_url}")
 
     # upload chosen-rejected with scores
-    if not ("PairRM" in args.model or "SteamSHP" in args.model):
+    if not model_type == "Custom Classifier":  # custom classifiers do not return scores
         # create new json with scores and upload
         scores_dict = out_dataset.to_dict()
+        scores_dict["model"] = args.model
+        scores_dict["model_type"] = model_type
+        scores_dict["chat_template"] = args.chat_template
+
         sub_path_scores = "eval-set-scores/" if not args.pref_sets else "pref-sets-scores/"
 
         scores_url = save_to_hub(scores_dict, args.model, sub_path_scores, args.debug)