Skip to content

Commit

Permalink
Merge pull request #125 from opentensor/staging
Browse files Browse the repository at this point in the history
v1.1.6
  • Loading branch information
Eugene-hu authored Aug 10, 2023
2 parents 41038d6 + 83b0fee commit a32cde7
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog
## 1.1.6 / 2023-08-10
### What’s Changed
- Diversity regularization by @isabella618033 in https://github.com/opentensor/validators/pull/124

## 1.1.5 / 2023-08-08
### What’s Changed
Expand Down
2 changes: 1 addition & 1 deletion openvalidators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@
from . import weights
from . import event

__version__ = "1.1.5"
__version__ = "1.1.6"
version_split = __version__.split(".")
__spec_version__ = (1000 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2]))
17 changes: 12 additions & 5 deletions openvalidators/reward/diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__( self, device: str ):
self.device = device
self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path )
self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device)
self.reward_quantile = torch.tensor(0.1).to(self.device)
self.reward_bottom_k = 3
self.history_reward_bottom_k = 2
self.historic_embeddings = torch.tensor([]).to(self.device)
self.history_range = (500, 15500)
Expand Down Expand Up @@ -116,18 +116,22 @@ def regularise( rewards ):
similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] )

# Reward to be at the bottom_k smallest of the 1 - similarity score.
rewards = torch.topk((1 - similarity), self.history_reward_bottom_k, largest = False)[0][:, -1]
rewards = torch.topk((1 - torch.abs(similarity)), self.history_reward_bottom_k, largest = False)[0][:, -1]

return regularise(rewards)

def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor:
def regularise( rewards ):
# sigmoid function that maps 0.07 -> 0.23; 0.1 -> 0.5; 0.2 -> 0.98
return 1/(1 + torch.exp(-40 * rewards + 4))

# Calculate the pairwise cosine similarity.
similarity = pairwise_cosine_similarity( embeddings, embeddings )

# Reward to be at the 10% quantile of the 1 - similarity score.
rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 )
rewards = torch.topk((1 - torch.abs(similarity)), self.reward_bottom_k, largest = False)[0][:, -1]

return rewards
return regularise(rewards)

def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor:
# Check if completions are empty, return 0 if so
Expand All @@ -149,4 +153,7 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch
if historic_rewards != None:
return batch_rewards * historic_rewards
else:
return batch_rewards
return batch_rewards

def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor:
return rewards
2 changes: 1 addition & 1 deletion openvalidators/reward/relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,4 @@ def reward( self, prompt: str, completion: str ) -> torch.FloatTensor:
# Calculate the pairwise cosine similarity.
similarity = pairwise_cosine_similarity( prompt_embed, embeddings )

return similarity
return torch.abs(similarity)

0 comments on commit a32cde7

Please sign in to comment.