Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
natolambert committed Sep 17, 2024
1 parent 82bd441 commit 0362d99
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 3 deletions.
17 changes: 14 additions & 3 deletions rewardbench/generative.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
"gpt-4o-2024-05-13",
"gpt-4o-mini-2024-07-18",
"gpt-4o-2024-08-06",
"o1-preview-2024-09-12",
"o1-mini-2024-09-12",
)

# feel free to add more models to this list via PR
Expand All @@ -78,6 +80,7 @@
"gemini-1.5-pro-exp-0827",
"gemini-1.5-flash-exp-0827",
"gemini-1.5-flash-8b",
"gemini-1.5-flash-8b-exp-0827",
)

API_MODEL_LIST = OPENAI_MODEL_LIST + ANTHROPIC_MODEL_LIST + TOGETHER_MODEL_LIST + GEMINI_MODEL_LIST
Expand Down Expand Up @@ -470,9 +473,17 @@ def chat_completion_openai(model, conv, temperature, max_tokens, api_dict=None):
for _ in range(API_MAX_RETRY):
try:
messages = conv.to_openai_api_messages()
response = client.chat.completions.create(
model=model, messages=messages, n=1, temperature=temperature, max_tokens=max_tokens
)
# remove system prompt for o1 models
if "o1-" in model:
messages = messages[1:]
response = client.chat.completions.create(
model=model, messages=messages, n=1, temperature=1
)
else:
response = client.chat.completions.create(
model=model, messages=messages, n=1, temperature=temperature, max_tokens=max_tokens
)

output = response.choices[0].message.content
break
except openai.APIError as e:
Expand Down
24 changes: 24 additions & 0 deletions scripts/configs/eval_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -759,4 +759,28 @@ LxzGordon/URM-LLaMa-3-8B:
# dpo: False
# generative: True
# num_gpus: 1
Ray2333/GRM-Gemma-2B-rewardmodel-ft:
model: Ray2333/GRM-Gemma-2B-rewardmodel-ft
tokenizer: Ray2333/GRM-Gemma-2B-rewardmodel-ft
chat_template: # none for tokenizer
batch_size: 16
trust_remote_code: False
dpo: False
quantized: False
Ray2333/Gemma-2B-rewardmodel-ft:
model: Ray2333/Gemma-2B-rewardmodel-ft
tokenizer: Ray2333/Gemma-2B-rewardmodel-ft
chat_template: # none for tokenizer
batch_size: 16
trust_remote_code: False
dpo: False
quantized: False
Ray2333/GRM-Llama3-8B-rewardmodel-ft:
model: Ray2333/GRM-Llama3-8B-rewardmodel-ft
tokenizer: Ray2333/GRM-Llama3-8B-rewardmodel-ft
chat_template: # none for tokenizer
batch_size: 8
trust_remote_code: False
dpo: False
quantized: False

0 comments on commit 0362d99

Please sign in to comment.