Skip to content

Commit

Permalink
Merge pull request #46 from opentensor/staging
Browse files Browse the repository at this point in the history
2.0.2 Release
  • Loading branch information
steffencruz authored Oct 25, 2023
2 parents 01f1572 + 26dd7b7 commit 94b19ec
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 4 deletions.
2 changes: 1 addition & 1 deletion prompting/validators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from . import event
from . import dataset

__version__ = "2.0.1"
__version__ = "2.0.2"
version_split = __version__.split(".")
__spec_version__ = (
(1000 * int(version_split[0]))
Expand Down
16 changes: 16 additions & 0 deletions prompting/validators/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,22 @@ async def run_step(
timeout=timeout,
)

# Restrict the format of acceptable followup completions.
for response in responses:
# remove leading and trailing periods
completion = response.completion.strip(".")

if "followup" in name and len(completion) > 0:
if "?" in completion:
# take first question that is found and only use the sentence before the question mark
completion = completion.split("?")[0].split(".")[-1]
else:
# otherwise take the last sentence
completion = completion.split(".")[-1].split(".")[-1]

# take maximum of 40 words
response.completion = " ".join(completion.split(" ")[-40:]) + "?"

# Compute the rewards for the responses given the prompt.
rewards: torch.FloatTensor = torch.zeros(len(responses), dtype=torch.float32).to(
self.device
Expand Down
4 changes: 2 additions & 2 deletions prompting/validators/reward/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ class DefaultRewardFrameworkConfig:
Note: All the weights should add up to 1.0.
"""

dpo_model_weight: float = 0.3
dpo_model_weight: float = 0.2
rlhf_model_weight: float = 0.4
reciprocate_model_weight: float = 0.3
reciprocate_model_weight: float = 0.4
dahoas_model_weight: float = 0
prompt_model_weight: float = 0
5 changes: 4 additions & 1 deletion prompting/validators/reward/diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(self, device: str):
self.history_reward_bottom_k = 2
self.historic_embeddings = torch.tensor([]).to(self.device)
self.history_range = (500, 15500)
self.boundary = 0.5

def get_embeddings(self, sentences: List[str]) -> "torch.FloatTensor":
"""Runs a forward pass through the model.
Expand Down Expand Up @@ -176,5 +177,7 @@ def get_rewards(
else:
return batch_rewards

def normalize_rewards(self, rewards: torch.FloatTensor) -> torch.FloatTensor:
def normalize_rewards(self, raw_rewards: torch.FloatTensor) -> torch.FloatTensor:
# Applies binarization on the rewards.
rewards = (raw_rewards > self.boundary).float()
return rewards

0 comments on commit 94b19ec

Please sign in to comment.