diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..86792ae --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,41 @@ +# Changelog + +## 1.1.8 / 2023-08-12 + +## What's Changed +- Make sure to serve axon first by @camfairchild in 14921d35c + + +**Full Changelog**: https://github.com/opentensor/validators/compare/v1.1.7...v1.1.8 + + +## 1.1.7 / 2023-08-11 +### What’s Changed +- Hotfix cutoff limit by @Eugene-hu in #126 + +## 1.1.6 / 2023-08-10 +### What’s Changed +- Diversity regularization by @isabella618033 in https://github.com/opentensor/validators/pull/124 + +## 1.1.5 / 2023-08-08 +### What’s Changed +- Adds new keywords for the task validator by @p-ferreira in #119 +- Save historic embeddings on disk by @opentaco in #121 +- Updates relevance mechanism by @Eugene-hu in #122 + +## 1.1.4 / 2023-08-07 +- HOTFIX: create and serve the axon at startup by @robertalanm in #120 + + +## 1.1.3 / 2023-08-02 +- Adds subtensor to metagraph sync by @camfairchild in #79 +- Fix wandb weights format logging by @p-ferreira in #88 +- Adds netuid tag to wandb runs by @p-ferreira in #95 +- Implements GPU cleaning for optmization by @Eugene-hu in #96 +- Adds compatibility with bittensor 5.3.3 by @camfairchild in #107 +- Adds historic diversity component by @isabella618033 in #111 +- Improvements on diveristy model by @isabella618033 and @Eugene-hu in #111 +- Prompt improvements by @mrseeker in #110 and @p-ferreira in #112 +- Adds Task Validator Filter to reward pipeline by @p-ferreira in #112 +- Fix for empty data retrieval from datasets by @p-ferreira in #113 +- Deprecates pip usage by @p-ferreira in #114 diff --git a/README.md b/README.md index 7863776..581c1a8 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ # **Open Validators** [![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/bittensor) -[![PyPI version](https://badge.fury.io/py/openvalidators.svg)](https://badge.fury.io/py/openvalidators) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) --- @@ -19,7 +18,7 @@ It offers several functionalities, such as: The main goal of this repository is to facilitate the interaction with the Bittensor network by providing a set of open-source validators to the community. The current validator implementation queries the network for responses and -evaluations using carefully crafted prompts, that are later evaluated by a large foundation GPT-J reward model. +evaluations using carefully crafted prompts using CoT, that are later evaluated by a pipeline of reward functions, including diversity, relevance, rlhf, among others. Additionally, the repository provides an analysis and data toolkit that allows users to analyze the data generated from the validator's interaction with the network. By default, the validator collects various data points, such as question @@ -69,14 +68,7 @@ There are currently four main avenues for engaging with this repository: - Serves individuals, researchers, and developers who seek to create datasets for the community's miners. # Install -There are two ways to use OpenTensor validators: - -1. With pip: -```bash -$ pip3 install openvalidators -``` - -2. From source: +From source: ```bash $ git clone https://github.com/opentensor/validators.git $ pip3 install -e openvalidators/ diff --git a/openvalidators/__init__.py b/openvalidators/__init__.py index 8ff2f02..87cf974 100644 --- a/openvalidators/__init__.py +++ b/openvalidators/__init__.py @@ -28,6 +28,6 @@ from . import weights from . import event -__version__ = "1.1.1" +__version__ = "1.1.8" version_split = __version__.split(".") __spec_version__ = (1000 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2])) diff --git a/openvalidators/config.py b/openvalidators/config.py index 1ec0c35..2a5c69d 100644 --- a/openvalidators/config.py +++ b/openvalidators/config.py @@ -166,6 +166,16 @@ def add_args(cls, parser): default=4096, ) + parser.add_argument( + "--neuron.axon_off", + "--axon_off", + action="store_true", + # Note: the validator needs to serve an Axon with their IP or they may + # be blacklisted by the firewall of serving peers on the network. + help="Set this flag to not attempt to serve an Axon.", + default=False, + ) + parser.add_argument("--wandb.off", action="store_true", help="Turn off wandb.", default=False) parser.add_argument( "--wandb.project_name", @@ -241,6 +251,12 @@ def add_args(cls, parser): action="store_true", help="Dont apply the diversity reward model", default=False, + ) + parser.add_argument( + "--neuron.task_validator_off", + action="store_true", + help="Dont apply the task validator reward model", + default=False, ) parser.add_argument( diff --git a/openvalidators/dataset.py b/openvalidators/dataset.py index 215f10f..96683c4 100644 --- a/openvalidators/dataset.py +++ b/openvalidators/dataset.py @@ -27,11 +27,17 @@ def __init__(self): self.openwebtext = iter( load_dataset("openwebtext", split="train", streaming=True).shuffle(seed=seed, buffer_size=10000) ) self.red_pajama = iter( load_dataset("togethercomputer/RedPajama-Data-1T", 'default', split='train', streaming=True).shuffle(seed=seed, buffer_size=10000) ) - def __next__(self): - if random.random() < 0.5: - return {"text": next(self.openwebtext)["text"]} - else: - return {"text": next(self.red_pajama)["text"]} + def __next__(self): + while True: + bt.logging.debug('Retrieving data from dataset...') + if random.random() < 0.5: + text = next(self.openwebtext)["text"] + else: + text = next(self.red_pajama)["text"] + + # Check if the text is not empty or does not consist only of newline characters + if text.strip(): + return {"text": text} class MockDataset(Iterator): diff --git a/openvalidators/event.py b/openvalidators/event.py index bbf797d..48ebabd 100644 --- a/openvalidators/event.py +++ b/openvalidators/event.py @@ -45,6 +45,7 @@ class EventSchema: rlhf_reward_model: Optional[List[float]] # Output vector of the rlhf reward model prompt_reward_model: Optional[List[float]] # Output vector of the prompt reward model relevance_filter: Optional[List[float]] # Output vector of the relevance scoring reward model + task_validator_filter: Optional[List[float]] # Weights data set_weights: Optional[List[List[float]]] @@ -55,6 +56,7 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> 'EventSchema': rewards = { 'dahoas_reward_model': event_dict.get(RewardModelType.dahoas.value), 'blacklist_filter': event_dict.get(RewardModelType.blacklist.value), + 'task_validator_filter': event_dict.get(RewardModelType.task_validator.value), 'nsfw_filter': event_dict.get(RewardModelType.nsfw.value), 'relevance_filter': event_dict.get(RewardModelType.relevance.value), 'reciprocate_reward_model': event_dict.get(RewardModelType.reciprocate.value), diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 376fc8c..5b5e071 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -62,7 +62,11 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor return uids -async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = []): +async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = [], base_prompt = None): + + if base_prompt == None: + base_prompt = prompt + bt.logging.debug("run_step", name) # Record event start time. @@ -83,18 +87,20 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude # Compute the rewards for the responses given the prompt. rewards: torch.FloatTensor = torch.zeros(len(responses), dtype=torch.float32).to(self.device) for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions): - reward_i = reward_fn_i.apply(prompt, responses, name).to(self.device) - rewards += weight_i * reward_i + reward_i, reward_i_normalized = reward_fn_i.apply(prompt, responses, name).to(self.device) + rewards += weight_i * reward_i_normalized if not self.config.neuron.disable_log_rewards: event[reward_fn_i.name] = reward_i.tolist() - bt.logging.trace(str(reward_fn_i.name), reward_i.tolist()) + event[reward_fn_i.name + '_normalized'] = reward_i_normalized.tolist() + bt.logging.trace(str(reward_fn_i.name), reward_i_normalized.tolist()) for masking_fn_i in self.masking_functions: - mask_i = masking_fn_i.apply(prompt, responses, name).to(self.device) - rewards *= mask_i # includes diversity + mask_i, mask_i_normalized = masking_fn_i.apply(base_prompt, responses, name).to(self.device) + rewards *= mask_i_normalized # includes diversity if not self.config.neuron.disable_log_rewards: event[masking_fn_i.name] = mask_i.tolist() - bt.logging.trace(str(masking_fn_i.name), mask_i.tolist()) + event[masking_fn_i.name + '_normalized'] = mask_i_normalized.tolist() + bt.logging.trace(str(masking_fn_i.name), mask_i_normalized.tolist()) # Train the gating model based on the predicted scores and the actual rewards. gating_scores: torch.FloatTensor = self.gating_model(prompt).to(self.device) @@ -168,6 +174,7 @@ async def forward(self): ) base_text = augment_event["best"] + base_prompt = augment_event["best"] exclude = augment_event["uids"] for k in range(self.config.neuron.num_followup_steps): @@ -180,6 +187,7 @@ async def forward(self): k=self.config.neuron.followup_sample_size, timeout=self.config.neuron.followup_timeout, exclude=exclude, + base_prompt=base_prompt ) exclude += followup_event["uids"] @@ -192,6 +200,7 @@ async def forward(self): k=self.config.neuron.answer_sample_size, timeout=self.config.neuron.answer_timeout, exclude=exclude, + base_prompt=followup_event["best"] ) exclude += answer_event["uids"] @@ -205,3 +214,4 @@ async def forward(self): ) else: base_text = base_text + "\nQuestion:" + followup_event["best"] + "\nAnswer:" + answer_event["best"] + \ No newline at end of file diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index 070387e..f21e670 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -34,11 +34,12 @@ # Load gating models from openvalidators.reward import ( Blacklist, + TaskValidator, NSFWRewardModel, DirectPreferenceRewardModel, OpenAssistantRewardModel, ReciprocateRewardModel, - BertRelevanceRewardModel, + RelevanceRewardModel, MockRewardModel, DahoasRewardModel, DiversityRewardModel, @@ -89,7 +90,9 @@ def __init__(self): self.wallet = bt.wallet(config=self.config) self.wallet.create_if_non_existent() if not self.config.wallet._mock: - self.wallet.reregister(subtensor=self.subtensor, netuid=self.config.netuid) + if not self.subtensor.is_hotkey_registered_on_subnet(hotkey_ss58=self.wallet.hotkey.ss58_address, netuid=self.config.netuid): + raise Exception(f'Wallet not currently registered on netuid {self.config.netuid}, please first register wallet before running') + bt.logging.debug(str(self.wallet)) # Init metagraph. @@ -125,6 +128,32 @@ def __init__(self): self.gating_model = GatingModel(metagraph=self.metagraph, config=self.config).to(self.device) bt.logging.debug(str(self.gating_model)) + if not self.config.neuron.axon_off: + bt.logging.debug('serving ip to chain...') + try: + axon = bt.axon( + wallet=self.wallet, metagraph=self.metagraph, config=self.config + ) + + try: + self.subtensor.serve_axon( + netuid=self.config.netuid, + axon=axon, + use_upnpc=False, + wait_for_finalization=True, + ) + except Exception as e: + bt.logging.error(f'Failed to serve Axon with exception: {e}') + pass + + del axon + except Exception as e: + bt.logging.error(f'Failed to create Axon initialize with exception: {e}') + pass + + else: + bt.logging.debug('axon off, not serving ip to chain.') + # Dendrite pool for querying the network during training. bt.logging.debug("loading", "dendrite_pool") if self.config.neuron.mock_dendrite_pool: @@ -190,24 +219,31 @@ def __init__(self): bt.logging.error(message) raise Exception(message) - + + # Masking functions self.blacklist = ( Blacklist() if not self.config.neuron.blacklist_off else MockRewardModel(RewardModelType.blacklist.value) ) + task_validator = ( + TaskValidator() if not self.config.neuron.task_validator_off + else MockRewardModel(RewardModelType.task_validator.value) + ) + relevance_model = ( + RelevanceRewardModel(device=self.device) if not self.config.neuron.relevance_off + else MockRewardModel(RewardModelType.relevance.value) + ) + self.diversity_model = ( + DiversityRewardModel(device=self.device) if not self.config.neuron.diversity_off + else MockRewardModel(RewardModelType.diversity.value) + ) + nsfw_model = ( + NSFWRewardModel(device=self.device) if not self.config.neuron.nsfw_off + else MockRewardModel(RewardModelType.nsfw.value) + ) - self.masking_functions = [ - self.blacklist, - BertRelevanceRewardModel(device=self.device) - if not self.config.neuron.relevance_off - else MockRewardModel(RewardModelType.relevance.value), - DiversityRewardModel(device=self.device) - if not self.config.neuron.diversity_off - else MockRewardModel(RewardModelType.diversity.value), - NSFWRewardModel(device=self.device) - if not self.config.neuron.nsfw_off - else MockRewardModel(RewardModelType.nsfw.value), - ] + self.masking_functions = [self.blacklist, task_validator, relevance_model, self.diversity_model, nsfw_model] bt.logging.debug(str(self.reward_functions)) + bt.logging.debug(str(self.masking_functions)) # Init the event loop. self.loop = asyncio.get_event_loop() diff --git a/openvalidators/prompts.py b/openvalidators/prompts.py index 61e59bf..d92fb08 100644 --- a/openvalidators/prompts.py +++ b/openvalidators/prompts.py @@ -124,7 +124,7 @@ def find_unique_tags(input_text: str): # Request a follow-up question given a preceding context. -followup_request_template = "Ask one relevant and insightful question about the preceding context" +followup_request_template = "Ask a single relevant and insightful question about the preceding context" # Scores a summary on a scale from 0 to 10, given a context. augment_scoring_template = """Score the relevance, succinctness, and quality of a summary given a context. The context is within tags, and the question is within tags. Give a score between 0 and 10 in the tags, where 0 means the summary is irrelevant, and 10 means it's perfectly relevant and a good summary. Include a brief explanation for your score based solely on the context-summary relationship. @@ -348,16 +348,16 @@ def find_unique_tags(input_text: str): def followup_prompt( base_text:str, i:int = 0) -> str: if i == 0: - return f"{base_text}\n\n{followup_request_template}\n" + return f"{base_text}\n\n{followup_request_template}\n. Do not try to return an answer or a summary:" else: - return f"{base_text}\n\n{followup_request_template} and previous questions\n" + return f"{base_text}\n\n{followup_request_template} and previous questions. Do not try to return an answer or a summary:\n" def answer_prompt( base_text:str, followup:str ) -> str: - return f"{base_text}\n Question:{followup}\n Answer the question step by step and explain your thoughts" + return f"{base_text}\n\nQuestion:{followup}\nAnswer the question step by step and explain your thoughts. Do not include questions or summaries in your answer." augment_request_template = "Summarize the preceding context" def augment_prompt( base_text:str ) -> str: random_level = random.randint(4, 8) - return f"{base_text}\n\n{augment_request_template} in {random_level} sentences.\n\n" \ No newline at end of file + return f"{base_text}\n\n{augment_request_template} in {random_level} sentences. Do not try to create questions or answers for your summarization.\n\n" diff --git a/openvalidators/reward/__init__.py b/openvalidators/reward/__init__.py index 28a8a5a..51a20f9 100644 --- a/openvalidators/reward/__init__.py +++ b/openvalidators/reward/__init__.py @@ -1,12 +1,13 @@ from .blacklist import Blacklist +from .task_validator import TaskValidator from .nsfw import NSFWRewardModel from .dpo import DirectPreferenceRewardModel from .open_assistant import OpenAssistantRewardModel from .reciprocate import ReciprocateRewardModel -from .relevance import BertRelevanceRewardModel +from .relevance import RelevanceRewardModel from .reward import BaseRewardModel from .reward import MockRewardModel from .dahoas import DahoasRewardModel from .diversity import DiversityRewardModel from .prompt import PromptRewardModel -from .config import RewardModelType, DefaultRewardFrameworkConfig +from .config import RewardModelType, DefaultRewardFrameworkConfig \ No newline at end of file diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py index 1f8a50f..53581b0 100644 --- a/openvalidators/reward/config.py +++ b/openvalidators/reward/config.py @@ -1,7 +1,5 @@ # The MIT License (MIT) # Copyright © 2021 Yuma Rao -from dataclasses import dataclass - # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, @@ -15,7 +13,7 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. - +from dataclasses import dataclass from enum import Enum @@ -29,6 +27,7 @@ class RewardModelType(Enum): blacklist = 'blacklist_filter' nsfw = 'nsfw_filter' relevance = 'relevance_filter' + task_validator = 'task_validator_filter' @dataclass(frozen=True) diff --git a/openvalidators/reward/diversity.py b/openvalidators/reward/diversity.py index 185c53b..242f302 100644 --- a/openvalidators/reward/diversity.py +++ b/openvalidators/reward/diversity.py @@ -55,7 +55,10 @@ def __init__( self, device: str ): self.device = device self.tokenizer = AutoTokenizer.from_pretrained( DiversityRewardModel.diversity_model_path ) self.model = AutoModel.from_pretrained( DiversityRewardModel.diversity_model_path ).to(self.device) - self.reward_quantile = torch.tensor(0.1).to(self.device) + self.reward_bottom_k = 2 + self.history_reward_bottom_k = 2 + self.historic_embeddings = torch.tensor([]).to(self.device) + self.history_range = (500, 15500) def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": """Runs a forward pass through the model. @@ -86,8 +89,51 @@ def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + def update_historic_embeddings( self, embeddings: torch.FloatTensor ): + def unique(embeddings): + unique_embeddings = [embeddings[0]] + last_emb = embeddings[0] + for emb in embeddings: + if not torch.all(torch.eq(emb, last_emb)): + unique_embeddings.append(emb) + last_emb = emb + return torch.stack(unique_embeddings) + + embeddings_unique = unique(embeddings) + historic_embeddings = torch.cat([self.historic_embeddings, embeddings_unique]) + self.historic_embeddings = historic_embeddings[-self.history_range[1]:, :] + + def get_historic_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: + def regularise( rewards ): + # sigmoid function that cutoff at 0.05 approximately + return 1/(1 + torch.exp(-1000 * rewards + 50)) + + # Return None if history size is too small + if self.historic_embeddings.shape[0] < (self.history_range[0] + self.history_reward_bottom_k): + return None + + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( embeddings, self.historic_embeddings[self.history_range[0]:] ) + + # Reward to be at the bottom_k smallest of the 1 - similarity score. + rewards = torch.topk((1 - torch.abs(similarity)), self.history_reward_bottom_k, largest = False)[0][:, -1] + + return regularise(rewards) + + def get_batch_rewards( self, embeddings: torch.FloatTensor ) -> torch.FloatTensor: + def regularise( rewards ): + # sigmoid function that maps 0.07 -> 0.23; 0.1 -> 0.5; 0.2 -> 0.98 + return 1/(1 + torch.exp(-40 * rewards + 4)) + + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( embeddings, embeddings ) + + # Reward to be at the 10% quantile of the 1 - similarity score. + rewards = torch.topk((1 - torch.abs(similarity)), self.reward_bottom_k, largest = False)[0][:, -1] + return regularise(rewards) + + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: # Check if completions are empty, return 0 if so if len(completions) == 0: return torch.tensor([]).to(self.device) @@ -95,11 +141,19 @@ def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch # Get embeddings for all completions. embeddings = self.get_embeddings( completions ) - # Calculate the pairwise cosine similarity. - similarity = pairwise_cosine_similarity( embeddings, embeddings ) + # Get batch rewards. + batch_rewards = self.get_batch_rewards(embeddings) - # Reward to be at the 10% quantile of the 1 - similarity score. - rewards = (1 - similarity).quantile(self.reward_quantile, dim = 1 ) + # get historic rewards. + historic_rewards = self.get_historic_rewards(embeddings) + self.update_historic_embeddings(embeddings) + # Return all + if historic_rewards != None: + return batch_rewards * historic_rewards + else: + return batch_rewards + + def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: return rewards \ No newline at end of file diff --git a/openvalidators/reward/relevance.py b/openvalidators/reward/relevance.py index 56e2063..c8789e1 100644 --- a/openvalidators/reward/relevance.py +++ b/openvalidators/reward/relevance.py @@ -21,13 +21,66 @@ from .config import RewardModelType from .reward import BaseRewardModel from transformers import AutoTokenizer, AutoModel +from torchmetrics.functional import pairwise_cosine_similarity +import torch.nn.functional as F + + +def mean_pooling(model_output, attention_mask): + """Applies mean pooling to the token embeddings generated by the model. + Args: + model_output (torch.Tensor): Embedding model output, where the first element contains token embeddings. + attention_mask (torch.Tensor): Attention mask to indicate valid tokens. + Returns: + torch.Tensor: Mean-pooled representation of the token embeddings. + Notes: + - The function calculates the mean-pooled representation using the attention mask for valid tokens. + - Input_mask_expanded is created by expanding the attention mask to match the size of token embeddings. + - The result is obtained by summing the element-wise multiplication of embeddings and input_mask_expanded, + and dividing it by the sum of input_mask_expanded after clamping its values to a minimum of 1e-9. + """ + token_embeddings = model_output[0] + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + ) + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( + input_mask_expanded.sum(1), min=1e-9 + ) + +class RelevanceRewardModel( BaseRewardModel ): + + @property + def name(self) -> str: return RewardModelType.relevance.value + + def __init__( self, device: str ): + super().__init__() + self.device = device + self.models = [ + BertRelevanceRewardModel(self.device), + MpnetRelevenceModel(self.device) + ] + self.bounds = [-0.0246, 0.3] + + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) + + def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + return rewards + + def reward(self, prompt: str, completion: str, name: str) -> float: + for i, model in enumerate(self.models): + + # rewards + diff = model.reward(prompt,completion) + + # If a model returns 0, stop iterating and return 0 + if diff < self.bounds[i]: + return 0.0 + # If none of the models returned 0, return 1 + return 1.0 class BertRelevanceRewardModel( BaseRewardModel ): relevance_model_path = "bert-base-uncased" - - @property - def name(self) -> str: return RewardModelType.relevance.value def __init__( self, device: str ): super().__init__() @@ -35,27 +88,6 @@ def __init__( self, device: str ): self.tokenizer = AutoTokenizer.from_pretrained(BertRelevanceRewardModel.relevance_model_path) self.model = AutoModel.from_pretrained(BertRelevanceRewardModel.relevance_model_path).to(self.device) - def mean_pooling(model_output, attention_mask): - """Applies mean pooling to the token embeddings generated by the model. - Args: - model_output (torch.Tensor): Embedding model output, where the first element contains token embeddings. - attention_mask (torch.Tensor): Attention mask to indicate valid tokens. - Returns: - torch.Tensor: Mean-pooled representation of the token embeddings. - Notes: - - The function calculates the mean-pooled representation using the attention mask for valid tokens. - - Input_mask_expanded is created by expanding the attention mask to match the size of token embeddings. - - The result is obtained by summing the element-wise multiplication of embeddings and input_mask_expanded, - and dividing it by the sum of input_mask_expanded after clamping its values to a minimum of 1e-9. - """ - token_embeddings = model_output[0] - input_mask_expanded = ( - attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() - ) - return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( - input_mask_expanded.sum(1), min=1e-9 - ) - def get_embedding(self, message: str) -> "torch.FloatTensor": """Runs a forward pass through the model. Args: @@ -79,12 +111,12 @@ def get_embedding(self, message: str) -> "torch.FloatTensor": with torch.no_grad(): embeddings = self.model(**encoded_input) - sentence_embeddings = BertRelevanceRewardModel.mean_pooling(embeddings, encoded_input["attention_mask"]) + sentence_embeddings = mean_pooling(embeddings, encoded_input["attention_mask"]) sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) batch_representation = torch.mean(sentence_embeddings, dim=0) return batch_representation - def reward( self, prompt: str, completion:str , name: str, bound = -0.0246 ) -> float: + def reward( self, prompt: str, completion:str ) -> float: # Get the two bert embeddings. completion_embedding = self.get_embedding( completion) prompt_embedding = self.get_embedding( prompt) @@ -93,10 +125,55 @@ def reward( self, prompt: str, completion:str , name: str, bound = -0.0246 ) -> diff = (( completion_embedding - prompt_embedding )**2).mean()**0.5 # Return relevance scoring. - return 0.0 if float(-diff) < bound else 1.0 + return float(-diff) - def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32).to(self.device) +class MpnetRelevenceModel( BaseRewardModel ): - def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: - return rewards \ No newline at end of file + diversity_model_path = "sentence-transformers/all-mpnet-base-v2" + + def __init__( self, device: str ): + super().__init__() + self.device = device + self.tokenizer = AutoTokenizer.from_pretrained( MpnetRelevenceModel.diversity_model_path ) + self.model = AutoModel.from_pretrained( MpnetRelevenceModel.diversity_model_path ).to(self.device) + self.reward_quantile = torch.tensor(0.1).to(self.device) + + def get_embeddings( self, sentences: List[str] ) -> "torch.FloatTensor": + """Runs a forward pass through the model. + Args: + sentences (:obj:`List[str]`): + text message to be encoded. + Returns: + embedding (:obj:`torch.FloatTensor`): + Embedding for the message. + """ + # Tokenizing sentences + + encoded_input = self.tokenizer( + sentences, + padding=True, + truncation=True, + return_tensors="pt", + ).to(self.device) + + # Compute token embedding + with torch.no_grad(): + embeddings = self.model(**encoded_input) + + # Pooling + sentence_embeddings = mean_pooling(embeddings, encoded_input["attention_mask"]) + + # Normalizing + sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) + return sentence_embeddings + + def reward( self, prompt: str, completion: str ) -> torch.FloatTensor: + + # Get embeddings for all completions. + embeddings = self.get_embeddings( completion ) + prompt_embed = self.get_embeddings( prompt ) + + # Calculate the pairwise cosine similarity. + similarity = pairwise_cosine_similarity( prompt_embed, embeddings ) + + return torch.abs(similarity) \ No newline at end of file diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 292d5d6..e95f931 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -98,17 +98,19 @@ def apply( self, prompt: str, responses: List[ bt.DendriteCall ], name: str) -> successful_rewards = self.get_rewards( prompt, successful_completions, name ) # Softmax rewards across samples. - successful_rewards = self.normalize_rewards( successful_rewards ) + successful_rewards_normalized = self.normalize_rewards( successful_rewards ) # Init zero rewards for all calls. filled_rewards = torch.zeros( len( responses ), dtype=torch.float32) + filled_rewards_normalized = torch.zeros( len( responses ), dtype=torch.float32) # Fill reward tensor. - for idx, reward in zip(successful_completions_indices, successful_rewards): + for idx, reward, reward_normalized in zip(successful_completions_indices, successful_rewards, successful_rewards_normalized): filled_rewards[idx] = reward + filled_rewards_normalized[idx] = reward_normalized # Return the filled rewards. - return filled_rewards + return filled_rewards, filled_rewards_normalized class MockRewardModel( BaseRewardModel ): @@ -121,7 +123,7 @@ def __init__(self, mock_name: str = 'MockReward'): self.mock_name = mock_name def apply( self, prompt: str, completion: List[str], name: str ) -> torch.FloatTensor: - return torch.tensor( [0 for _ in completion], dtype=torch.float32 ) - + mock_reward = torch.tensor( [0 for _ in completion], dtype=torch.float32 ) + return mock_reward, mock_reward \ No newline at end of file diff --git a/openvalidators/reward/task_validator.py b/openvalidators/reward/task_validator.py new file mode 100644 index 0000000..e9dfb77 --- /dev/null +++ b/openvalidators/reward/task_validator.py @@ -0,0 +1,63 @@ +# The MIT License (MIT) +# Copyright © 2021 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import torch +from typing import List +from .config import RewardModelType +from .reward import BaseRewardModel + + +class TaskValidator( BaseRewardModel ): + + @property + def name(self) -> str: return RewardModelType.task_validator.value + + def __init__(self): + super().__init__() + + def reward( self, prompt: str, completion: str, name: str ) -> float: + summary_keywords = ['Summary:', 'Paraphrase:', 'Paraphrasing:', 'Paraphrased:'] + question_keywords = ['Question:', 'Query:', 'Q:'] + answer_keywords = ['Answer:', 'Response:', 'A:', 'Completion:'] + + completion_contains_answer = any(answer_keyword.lower() in completion.lower() for answer_keyword in answer_keywords) + completion_contains_question = any(question_keyword.lower() in completion.lower() for question_keyword in question_keywords) + completion_contains_summary = any(summary_keyword.lower() in completion.lower() for summary_keyword in summary_keywords) + + is_summarization_prompt = name == 'augment' + is_question_prompt = name.startswith('followup') + is_answer_prompt = name.startswith('answer') + + if (is_summarization_prompt or is_question_prompt) and completion_contains_answer: + return 0.0 + + if (is_summarization_prompt or is_answer_prompt) and completion_contains_question: + return 0.0 + + if not is_summarization_prompt and completion_contains_summary: + return 0.0 + + return 1 + + def get_rewards( self, prompt: str, completions: List[str], name: str ) -> torch.FloatTensor: + return torch.tensor( [self.reward( prompt, completion, name ) for completion in completions], dtype=torch.float32) + + def normalize_rewards( self, rewards: torch.FloatTensor ) -> torch.FloatTensor: + return rewards + + def reset(self): + pass + diff --git a/openvalidators/utils.py b/openvalidators/utils.py index 340a020..e9e6c92 100644 --- a/openvalidators/utils.py +++ b/openvalidators/utils.py @@ -54,7 +54,7 @@ def init_wandb(self, reinit=False): reinit=reinit, project=self.config.wandb.project_name, entity=self.config.wandb.entity, - config=self.config, + config={key: self.config.get(key, None) for key in ('neuron', 'reward')}, mode="offline" if self.config.wandb.offline else "online", dir=self.config.neuron.full_path, tags=tags, @@ -194,7 +194,10 @@ def save_state(self): prefix="Saved model", sufix=f"{ self.config.neuron.full_path }/model.torch", ) + except Exception as e: + bt.logging.warning(f"Failed to save model with error: {e}") + try: # Save the gating model. gating_model_linear_layer_dict = self.gating_model.linear.state_dict() gating_model_name = self.config.gating.model_name.replace("/", "_") @@ -205,7 +208,7 @@ def save_state(self): wandb.log({ "step": self.step, "block": ttl_get_block(self), - **neuron_state_dict + **neuron_state_dict }) if not self.config.wandb.off and self.config.wandb.track_gating_model: model_artifact = wandb.Artifact(f"{gating_model_name}_gating_linear_layer", type="model") @@ -213,12 +216,23 @@ def save_state(self): self.wandb.log_artifact(model_artifact) bt.logging.success(prefix="Saved gating model", sufix=f"{gating_model_file_path}") + except Exception as e: + bt.logging.warning(f"Failed to save gating model with error: {e}") - #empty cache - torch.cuda.empty_cache() - + try: + # Save diversity model. + diversity_model_dict = {"historic_embeddings": self.diversity_model.historic_embeddings.to('cpu')} + diversity_model_file_path = f"{self.config.neuron.full_path}/diversity_model.pth" + torch.save(diversity_model_dict, diversity_model_file_path) + bt.logging.success( + prefix="Saved diversity model", + sufix=f"{diversity_model_file_path} {list(self.diversity_model.historic_embeddings.shape)}", + ) except Exception as e: - bt.logging.warning(f"Failed to save model with error: {e}") + bt.logging.warning(f"Failed to save diversity model with error: {e}") + + # empty cache + torch.cuda.empty_cache() def load_state(self): @@ -227,8 +241,9 @@ def load_state(self): try: state_dict = torch.load(f"{self.config.neuron.full_path}/model.torch") # Check for nans in saved state dict - if not torch.isnan(state_dict["neuron_weights"]).any(): - self.moving_averaged_scores = state_dict["neuron_weights"].clone().detach() + neuron_weights = torch.tensor(state_dict["neuron_weights"]) + if not torch.isnan(neuron_weights).any(): + self.moving_averaged_scores = neuron_weights.to(self.device) self.hotkeys = state_dict["neuron_hotkeys"] bt.logging.success( prefix="Reloaded model", @@ -236,3 +251,15 @@ def load_state(self): ) except Exception as e: bt.logging.warning(f"Failed to load model with error: {e}") + + try: + # Load diversity model. + diversity_model_file_path = f"{self.config.neuron.full_path}/diversity_model.pth" + diversity_model_dict = torch.load(diversity_model_file_path) + self.diversity_model.historic_embeddings = diversity_model_dict["historic_embeddings"].to(self.device) + bt.logging.success( + prefix="Reloaded diversity model", + sufix=f"{diversity_model_file_path} {list(self.diversity_model.historic_embeddings.shape)}", + ) + except Exception as e: + bt.logging.warning(f"Failed to load diversity model with error: {e}") diff --git a/requirements.txt b/requirements.txt index d8bf1a2..bc4a86e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -bittensor==5.2.0 transformers<=4.31.0 +bittensor>=5.2.1,<6.0.0 wandb==0.15.3 -datasets==2.12.0 +datasets==2.14.0 plotly==5.14.1 networkx==3.1 scipy==1.10.1 diff --git a/scripts/release/README.md b/scripts/release/README.md new file mode 100644 index 0000000..162e22c --- /dev/null +++ b/scripts/release/README.md @@ -0,0 +1,59 @@ +# Release Script(s) Usage + +## Versioning + +This script needs: +- An existing `openvalidators/__init__.py` file +- An existing `__version__` variable in that file +- An existing version for that variable + +This process will generate: +- A modified version in `__version__` for the update type specified + +### Example Usage +`./scripts/release/versioning.sh -U patch -A` + +Where: +* `-U` (major|minor|patch) the type of update +* `-A` is to apply the script changes + + +## Add Notes Changelog + +This script needs: +- An existing `CHANGELOG.md` file with at least three lines +- An existing git tag for the previous version + +This process will generate: +- A new entry in `CHANGELOG.md` + +##### *Note: This will only list merge commits into the release branch since the last tag* + +### Example Usage +`./scripts/release/add_notes_changelog.sh -P 1.1.7 -V 1.1.8 -B hotfix/serve-val-axon -T $GIT -A` + +Where: +* `-P` is the old version +* `-V` is the new version +* `-B` is the release branch name (default: `release/vX.X.X`) +* `-T` is the GIT API token +* `-A` is to apply the script changes + +## Release + +This script needs: +- An existing `__version__` variable in the `openvalidators/__init__.py` file +- Version in the `__version__` variable is not a git tag already + +This process will generate: +- Tag in Github repo: https://github.com/opentensor/validators/tags +- Release in Github: https://github.com/opentensor/validators/releases + + +### Example Usage +`./scripts/release/release.sh -T $GIT -A` + +Where: +* `-T` is the GIT API token +* `-A` is to apply the script changes + diff --git a/scripts/release/add_notes_changelog.sh b/scripts/release/add_notes_changelog.sh new file mode 100755 index 0000000..d0f7594 --- /dev/null +++ b/scripts/release/add_notes_changelog.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +#### +# Utils +#### +source ${BASH_SOURCE%/*}/utils.sh +source ${BASH_SOURCE%/*}/github_utils.sh +### + +# 1. Get options + +## Defaults +APPLY="false" + +while [[ $# -gt 0 ]]; do + case $1 in + -A|--apply) + APPLY="true" + shift # past argument + ;; + -P|--previous-version-tag) + PREV_TAG_VERSION="$2" + shift # past argument + shift # past value + ;; + -V|--version) + VERSION="$2" + shift # past argument + shift # past value + ;; + -T|--github-token) + GITHUB_TOKEN="$2" + shift # past argument + shift # past value + ;; + -B|--release-branch) + RELEASE_BRANCH="$2" + shift # past argument + shift # past value + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + *) + POSITIONAL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac +done + +if [[ -z $GITHUB_TOKEN && $APPLY == "true" ]]; then + echo_error "Github token required (-T, --github-token)" + exit 1 +fi + +if [[ -z $PREV_TAG_VERSION ]]; then + echo_error "Previous version tag required (-P, --previous-version-tag)" + exit 1 +fi + +if [[ -z $VERSION ]]; then + echo_error "Version to release required (-V, --version)" + exit 1 +fi + +if [[ -z $RELEASE_BRANCH ]]; then + echo_warning "Release branch not specified with (-B, --release-branch) assuming: release/$VERSION" + RELEASE_BRANCH=release/$VERSION +fi + +DATE=$(date +"%Y-%m-%d") +RELEASE_NAME="$VERSION / $DATE" +TAG_NAME=v$VERSION +PREV_TAG_NAME=v$PREV_TAG_VERSION + +# 2.2. Generate release notes +if [[ $APPLY == "true" ]]; then + echo_info "Generating Github release notes" + RESPONSE=$(generate_github_release_notes_for_changelog $GITHUB_TOKEN) + DESCRIPTION=$(echo $RESPONSE | jq '.body' | tail -1 | sed "s/\"//g") + + if [ $(echo $RESPONSE | jq '.body' | wc -l) -eq 1 ]; then + if [ $(echo $RESPONSE | jq '.' | grep 'documentation_url' | wc -l) -gt 0 ]; then + echo_error "Something went wrong generating Github release notes" + echo $RESPONSE | jq --slurp '.[0]' + exit 1 + fi + + if [ $(echo $RESPONSE | jq '.type' | grep 'error' | wc -l) -gt 0 ]; then + echo_error "Something went wrong generating Github release notes" + echo $RESPONSE | jq --slurp '.[1]' + exit 1 + fi + fi +else + echo_warning "Dry run execution. Not generating Github release notes" +fi + +if [[ $APPLY == "true" ]]; then + echo_info "Adding release notes to CHANGELOG.md" + sed -i "2 i\\\n## $RELEASE_NAME" CHANGELOG.md + sed -i "4 i\\\n$DESCRIPTION\n" CHANGELOG.md +else + echo_warning "Dry run execution. Not adding release notes to CHANGELOG.md" +fi \ No newline at end of file diff --git a/scripts/release/github_release.sh b/scripts/release/github_release.sh new file mode 100755 index 0000000..08a551e --- /dev/null +++ b/scripts/release/github_release.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +#### +# Utils +#### +source ${BASH_SOURCE%/*}/utils.sh +source ${BASH_SOURCE%/*}/github_utils.sh +### + +# 1. Get options + +## Defaults +APPLY="false" + +while [[ $# -gt 0 ]]; do + case $1 in + -A|--apply) + APPLY="true" + shift # past argument + ;; + -P|--previous-version-tag) + PREV_TAG_VERSION="$2" + shift # past argument + shift # past value + ;; + -V|--version) + VERSION="$2" + shift # past argument + shift # past value + ;; + -T|--github-token) + GITHUB_TOKEN="$2" + shift # past argument + shift # past value + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + *) + POSITIONAL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac +done + +if [[ -z $GITHUB_TOKEN && apply == "true" ]]; then + echo_error "Github token required (-T, --github-token)" + exit 1 +fi + +if [[ -z $PREV_TAG_VERSION ]]; then + echo_error "Previous version tag required (-P, --previous-version-tag)" + exit 1 +fi + +if [[ -z $VERSION ]]; then + echo_error "Version to release required (-V, --version)" + exit 1 +fi + +# 2. Github +DATE=$(date +"%Y-%m-%d") +RELEASE_NAME="$VERSION / $DATE" +PREV_TAG_NAME=$PREV_TAG_VERSION +TAG_NAME=v$VERSION + +# 2.1 Create Git tag for the repository +if [[ $APPLY == "true" ]]; then + echo_info "Tagging repository" + tag_repository $TAG_NAME +else + echo_warning "Dry run execution. Not tagging Github repo" +fi + +# 2.2. Generate release notes +if [[ $APPLY == "true" ]]; then + echo_info "Generating Github release notes" + RESPONSE=$(generate_github_release_notes $GITHUB_TOKEN) + DESCRIPTION=$(echo $RESPONSE | jq '.body' | tail -1 | sed "s/\"//g") + + if [ $(echo $RESPONSE | jq '.body' | wc -l) -eq 1 ]; then + if [ $(echo $RESPONSE | jq '.' | grep 'documentation_url' | wc -l) -gt 0 ]; then + echo_error "Something went wrong generating Github release notes" + echo $RESPONSE | jq --slurp '.[0]' + exit 1 + fi + + if [ $(echo $RESPONSE | jq '.type' | grep 'error' | wc -l) -gt 0 ]; then + echo_error "Something went wrong generating Github release notes" + echo $RESPONSE | jq --slurp '.[1]' + exit 1 + fi + fi +else + echo_warning "Dry run execution. Not generating Github release notes" +fi + +# 2.3 Create Github release +if [[ $APPLY == "true" ]]; then + echo_info "Generating Github release" + create_github_release $GITHUB_TOKEN +else + echo_warning "Dry run execution. Not creating Github release" +fi \ No newline at end of file diff --git a/scripts/release/github_utils.sh b/scripts/release/github_utils.sh new file mode 100644 index 0000000..243449d --- /dev/null +++ b/scripts/release/github_utils.sh @@ -0,0 +1,169 @@ +#!/bin/bash + +#### +# Utils +#### +source ${BASH_SOURCE%/*}/utils.sh + +# +# Params: +# - First positional argument: version of the tag +# +function tag_repository() +{ + VERSION=$1 + + if [[ -z $VERSION ]]; then + echo_error "tag_repository needs VERSION" + exit 1 + fi + + git tag -a $VERSION -m "Release $VERSION" + git push origin --tags +} + +# +# Params: +# - First positional argument: version of the tag +# +function remove_tag() +{ + VERSION=$1 + + if [[ -z $VERSION ]]; then + echo_error "remove_tag needs VERSION" + exit 1 + fi + + git tag -d $VERSION + git push --delete origin $VERSION +} + +# +# Needs: +# - TAG_NAME +# - PREV_TAG_NAME +# - RELEASE_NAME +# +function generate_github_release_notes_post_data() +{ + cat < /dev/null +} \ No newline at end of file diff --git a/scripts/release/release.sh b/scripts/release/release.sh new file mode 100755 index 0000000..f631fd6 --- /dev/null +++ b/scripts/release/release.sh @@ -0,0 +1,122 @@ +#!/bin/bash + +# +# In this script you are going to find the process of releasing Openvalidators. +# +# This script needs: +# - An existing __version__ var in the __init__.py file +# - Version in __version__ var is not a git tag already +# +# This process will generate: +# - Tag in Github repo: https://github.com/opentensor/validators/tags +# - Release in Github: https://github.com/opentensor/validators/releases +# - New entry in CHANGELOG.md file +# + +### +# Utils +### + +source ${BASH_SOURCE%/*}/utils.sh + +function help(){ + echo Usage: + echo \ \ $0 + echo + echo This script release a openvalidators version. + echo + echo This script needs: + echo \ \ - An existing __version__ var in the __init__.py file + echo \ \ - Version in __version__ var is not a git tag already + echo +} +### + +### +# Start of release process +### + +# 0. Check requirements +# Expected state for the execution environment +# - __version__ exists inside file 'openvalidators/__init__.py' +# - Version has the expected format + +CODE_WITH_VERSION='openvalidators/__init__.py' + +CODE_VERSION=`grep '__version__\ \=\ ' $CODE_WITH_VERSION | awk '{print $3}' | sed 's/"//g'` +VERSION=$CODE_VERSION + +if ! [[ "$CODE_VERSION" =~ ^[0-9]+.[0-9]+.[0-9]+$ ]];then + echo_error "Requirement failure: Version in code '$CODE_VERSION' with wrong format" + exit 1 +fi + +# 1. Get options + +## Defaults +APPLY="false" +APPLY_ACTION="" + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + help + exit 0 + ;; + -A|--apply) + APPLY="true" + APPLY_ACTION="--apply" + shift # past argument + ;; + -T|--github-token) + GITHUB_TOKEN="$2" + shift # past argument + shift # past value + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + *) + POSITIONAL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac +done + +if [[ $APPLY == "true" ]]; then + echo_warning "Not a Dry run exection" +else + echo_warning "Dry run execution" +fi + +if [[ -z $GITHUB_TOKEN && $APPLY == "true" ]]; then + echo_error "Github token required (-T, --github-token)" + exit 1 +fi + +# 2. Checking version + +CURRENT_VERSION_EXISTS=$(git tag | grep $VERSION) +if [[ ! -z $CURRENT_VERSION_EXISTS ]]; then + echo_error "Current version '$VERSION' already exists" + help + exit 1 +fi + +PREV_VERSION_TAG=`get_git_tag_higher_version` + +TAG_NAME=v$VERSION + +## 2.1. Current VERSION is not already a tag + +echo_info "Detected new version tag: $VERSION" +echo_info "Previous version tag: $PREV_VERSION_TAG" +echo_info "Tag generated: $TAG_NAME" + +# 3. Create Github resources +if [[ $APPLY == "true" ]]; then + ${BASH_SOURCE%/*}/github_release.sh $APPLY_ACTION --github-token $GITHUB_TOKEN -P $PREV_VERSION_TAG -V $VERSION +else + ${BASH_SOURCE%/*}/github_release.sh $APPLY_ACTION $GITHUB_TOKEN -P $PREV_VERSION_TAG -V $VERSION +fi diff --git a/scripts/release/utils.sh b/scripts/release/utils.sh new file mode 100755 index 0000000..91ba2f4 --- /dev/null +++ b/scripts/release/utils.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +NC='\033[0m' # No Color + +function echo_error { + echo -e "${RED}[ERROR]${NC} $1" +} + +function echo_warning { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +function echo_info { + echo -e "${GREEN}[INFO]${NC} $1" +} + +function echo_json { + echo "{\"type\":\"$1\",\"message\":\"$2\"}" +} + +function get_git_tag_higher_version { + echo `git tag -l --sort -version:refname | head -n 1` +} \ No newline at end of file diff --git a/scripts/release/versioning.sh b/scripts/release/versioning.sh new file mode 100755 index 0000000..e9affb4 --- /dev/null +++ b/scripts/release/versioning.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +#### +# Utils +#### +source ${BASH_SOURCE%/*}/utils.sh +### + +# 1. Get options + +## Defaults +APPLY="false" + +while [[ $# -gt 0 ]]; do + case $1 in + -A|--apply) + APPLY="true" + shift # past argument + ;; + -U|--update) + VERSION_TYPE="$2" + shift # past argument + shift # past value + ;; + -*|--*) + echo "Unknown option $1" + exit 1 + ;; + *) + POSITIONAL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac +done + +if [[ $VERSION_TYPE != "major" && $VERSION_TYPE != "minor" && $VERSION_TYPE != "patch" && $VERSION_TYPE != "rc" ]]; then + echo_error "Incorrect version type (-U|--update). Version types accepted: {major, minor, patch}" + exit 1 +fi + +VERSION=$(cat VERSION) +CODE_WITH_VERSION='openvalidators/__init__.py' + +MAJOR=$(awk -F. '{print $1}' <<< $VERSION) +MINOR=$(awk -F. '{print $2}' <<< $VERSION) +PATCH=$(awk -F. '{print $3}' <<< $VERSION) + +# RC version +RC=$(awk -F- '{print $NF}' <<< $version) +if [ -z $RC ]; then + CURRENT_VERSION="$MAJOR.$MINOR.$PATCH" +else + CURRENT_VERSION="$MAJOR.$MINOR.$PATCH-$RC" +fi + +case $VERSION_TYPE in + "major") + echo_info "Applying a $VERSION_TYPE update" + NEW_VERSION="$((MAJOR + 1)).0.0" + ;; + "minor") + echo_info "Applying a $VERSION_TYPE update" + NEW_VERSION="$MAJOR.$((MINOR + 1)).0" + ;; + "patch") + echo_info "Applying a $VERSION_TYPE update" + NEW_VERSION="$MAJOR.$MINOR.$((PATCH + 1))" + ;; + "rc") + SUFFIX=$2 + if [ -z $SUFFIX ]; then + echo_error "Suffix is needed when updating version to a RC" + exit 1 + fi + NEW_VERSION="$MAJOR.$MINOR.$PATCH-$SUFFIX" + ;; + *) + echo_error "This operation is not allowed. Try one of the following: {major, minor, patch, rc}" + exit 1 + ;; +esac + + +echo_info "Current version: $CURRENT_VERSION" +echo_info "New version: $NEW_VERSION" + +if [[ $APPLY == "true" ]]; then + echo_info "Updating version in code: sed -i "18,30s/$VERSION/$NEW_VERSION/g" $CODE_WITH_VERSION" + sed -i "18,30s/$VERSION/$NEW_VERSION/g" $CODE_WITH_VERSION +else + echo_warning "Dry run execution. Version update not applied" + echo_info "Use -A or --apply to apply changes" +fi \ No newline at end of file diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py new file mode 100644 index 0000000..ea6e2eb --- /dev/null +++ b/tests/helpers/__init__.py @@ -0,0 +1,31 @@ +# The MIT License (MIT) +# Copyright © 2023 Opentensor Technologies + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +from bittensor_wallet.mock import MockWallet as _MockWallet, utils as _mock_wallet_utils + +_get_mock_coldkey = _mock_wallet_utils.get_mock_coldkey +_get_mock_hotkey = _mock_wallet_utils.get_mock_hotkey +_get_mock_keypair = _mock_wallet_utils.get_mock_keypair +_get_mock_wallet = _mock_wallet_utils.get_mock_wallet + + +def __mock_wallet_factory__(*args, **kwargs) -> _MockWallet: + """Returns a mock wallet object.""" + + mock_wallet = _get_mock_wallet() + + return mock_wallet diff --git a/tests/reward/__init__.py b/tests/reward/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/reward/test_task_validator.py b/tests/reward/test_task_validator.py new file mode 100644 index 0000000..8ebefc7 --- /dev/null +++ b/tests/reward/test_task_validator.py @@ -0,0 +1,101 @@ +import unittest +from openvalidators.reward.task_validator import TaskValidator + +class TaskValidatorTestCase(unittest.TestCase): + """ + This class contains unit tests for the TaskValidator class. + + The tests cover different scenarios for the `reward` method of the TaskValidator class. + The `reward` method is expected to return a reward based on the task name and the completion text. + """ + + def setUp(self): + self.validator = TaskValidator() + + def test_augment_with_answer_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'augment' (summarization) + and the completion contains the 'Answer:' keyword. + """ + name = f'augment' + completion = "Summary: test summary\nAnswer: Test answer" + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_followup_with_answer_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'followup' (question generation) + and the completion contains the 'Answer:' keyword. + """ + for i in range(0, 4): + name = f'followup{i}' + completion = 'Question: This is a test question?\nAnswer: This is a test answer.' + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_augment_with_question_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'augment' (summarization) + and the completion contains the 'Question:' keyword. + """ + name = f'augment' + completion = "Summary: test summary\nQuestion: This is a test question?" + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_answer_with_question_keyword(self): + """ + Test if the reward method returns 0 when the task "name" is 'answer' (answer generation) + and the completion contains the 'Question:' keyword. + """ + for i in range(0, 4): + name = f'answer{i}' + completion = 'Question: This is a test question?\nAnswer: This is a test answer.' + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_followup_and_answer_with_summary_keyword(self): + """ + Test if the reward method returns 0 when the task "name" is different from "augment" (summarization) + and the completion contains the 'Summary:' keyword. + """ + for name in ['followup0', 'followup1', 'followup2', 'followup3', 'answer0', 'answer1', 'answer2', 'answer3']: + completion = 'Summary: This is a test summary.' + self.assertEqual(self.validator.reward('', completion, name), 0.0) + + def test_reward_valid_followup(self): + """ + Test if the reward method returns 1 when the task "name" starts with 'followup' (question generation) + and the completion contains a question + """ + for i in range(0, 4): + name = f'followup{i}' + completion = 'Question: This is a test question?' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + + def test_reward_valid_answer(self): + """ + Test if the reward method returns 1 when the task "name" is 'answer' (answer generation) + and the completion contains an answer + """ + for i in range(0, 4): + name = f'answer{i}' + completion = 'Answer: This is a test answer.' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + + def test_reward_valid_augment(self): + """ + Test if the reward method returns 1 when the task "name" is 'augment' (summarization) + and the completion contains the a summary. + """ + name = 'augment' + completion = 'Summary: This is a test summary.' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + + def test_reward_valid_other(self): + """ + Test if the reward method returns 1 when the task "name" is different from "augment", "followup", and "answer" + and the completion does not contain the 'Summary:', 'Answer:', and 'Question:' keywords. + """ + for name in ['followup0', 'followup1', 'followup2', 'followup3', 'answer0', 'answer1', 'answer2', 'answer3']: + completion = 'This is a test completion.' + self.assertEqual(self.validator.reward('', completion, name), 1.0) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_dataset.py b/tests/test_dataset.py new file mode 100644 index 0000000..c220768 --- /dev/null +++ b/tests/test_dataset.py @@ -0,0 +1,26 @@ +import unittest +from openvalidators.dataset import Dataset + + +class DatasetTestCase(unittest.TestCase): + def test_next_skips_empty_and_newline_only_strings(self): + mock_data = iter([{"text": ""}, {"text": "\n\n"}, {"text": "Non-empty text"}]) + dataset = Dataset() + dataset.openwebtext = mock_data + dataset.red_pajama = mock_data + + # Test that __next__ skips empty texts and texts that consist only of newline characters + self.assertEqual(dataset.__next__(), {"text": "Non-empty text"}) + + def test_next_returns_regular_strings(self): + mock_data = iter([{"text": "Non-empty text"}]) + dataset = Dataset() + dataset.openwebtext = mock_data + dataset.red_pajama = mock_data + + # Test that __next__ returns a non-empty text + self.assertEqual(dataset.__next__(), {"text": "Non-empty text"}) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_event.py b/tests/test_event.py index 3ad566f..7fb9f2b 100644 --- a/tests/test_event.py +++ b/tests/test_event.py @@ -45,6 +45,7 @@ def test_event_from_dict_all_forward_columns_match(self): RewardModelType.rlhf.value: [1.0], RewardModelType.prompt.value: [1.0], RewardModelType.relevance.value: [1.0], + RewardModelType.task_validator.value: [1.0] } # Act @@ -102,6 +103,7 @@ def test_event_from_dict_forward_no_reward_logging(self): assert event.rlhf_reward_model is None assert event.prompt_reward_model is None assert event.relevance_filter is None + assert event.task_validator_filter is None def test_event_from_dict_forward_reward_logging_mismatch(self): """Test that all default columns logged on the forward pass are correctly converted and that @@ -142,4 +144,5 @@ def test_event_from_dict_forward_reward_logging_mismatch(self): assert event.rlhf_reward_model is None assert event.prompt_reward_model is None assert event.relevance_filter is None + assert event.task_validator_filter is None diff --git a/tests/test_weights.py b/tests/test_weights.py index 933e6a0..6fb5c6b 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -20,13 +20,30 @@ import sys from openvalidators.neuron import neuron as Neuron from openvalidators.forward import run_step -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch + +from .helpers import __mock_wallet_factory__ CLI_ARGS_STR = "validators/openvalidators/neuron.py --mock --wallet._mock --wandb.off --neuron.followup_sample_size 10 --neuron.answer_sample_size 10" SYS_ARGV = sys.argv.copy() +patcher = None + +def setUpModule(): + """Runs once for the tests in this module.""" + global patcher + patcher = patch("bittensor.wallet.__new__", __mock_wallet_factory__ ) + patcher.start() + +def tearDownModule(): + """Runs once for the tests in this module.""" + global patcher + if patcher: + patcher.stop() + + def test_uid_weights_unchanged_unless_queried(n_steps=10, n_concurrent=1): """Test that the weights of unqueried uids do not over the course of a forward pass."""