Skip to content

Commit

Permalink
Check DPO min completion len
Browse files Browse the repository at this point in the history
  • Loading branch information
opentaco committed Aug 25, 2023
1 parent 6286e9c commit 877177f
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions openvalidators/reward/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def name(self) -> str: return RewardModelType.dpo.value

def __init__(self, device: str):
super().__init__()
self.min_completion_len = 5 # minimum number of tokens expected in each completion part.
self.device = device
self.tokenizer = AutoTokenizer.from_pretrained(DirectPreferenceRewardModel.reward_model_name)
self.model = AutoModelForCausalLM.from_pretrained(DirectPreferenceRewardModel.reward_model_name,
Expand All @@ -49,6 +50,12 @@ def reward_single(self, prompt: str, completion: str, name: str) -> float:
# Tokenize only the prompt, to help determine prompt token length.
prompt_part = self.tokenizer(prompt, return_tensors="pt").input_ids[0].to(self.device) # [prompt_len]

# Number of tokens in the completion part.
completion_len = len(combined) - len(prompt_part)
# Completion part is less than the minimum allowed length.
if completion_len < self.min_completion_len:
return -11. # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size)

# Completion doesn't fit into model sequence, so return lowest reward.
if self.tokenizer.model_max_length <= len(prompt_part):
return -11. # exp(-11)=1.67e-5 < 2e-5=1/50257 (typical vocab size)
Expand Down

0 comments on commit 877177f

Please sign in to comment.