Merge remote-tracking branch 'origin/add_original_reward_to_wandb' in…

…to reward_weighting_update
opentensor · Aug 24, 2023 · 92173c3 · 92173c3
2 parents e0eeb05 + 21dbce9
commit 92173c3
Show file tree

Hide file tree

Showing 30 changed files with 1,285 additions and 101 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,41 @@
+# Changelog
+
+## 1.1.8 / 2023-08-12
+
+## What's Changed
+- Make sure to serve axon first by @camfairchild in 14921d35c
+
+
+**Full Changelog**: https://github.com/opentensor/validators/compare/v1.1.7...v1.1.8
+
+
+## 1.1.7 / 2023-08-11
+### What’s Changed
+- Hotfix cutoff limit by @Eugene-hu  in #126
+
+## 1.1.6 / 2023-08-10
+### What’s Changed
+- Diversity regularization by @isabella618033 in https://github.com/opentensor/validators/pull/124
+
+## 1.1.5 / 2023-08-08
+### What’s Changed
+- Adds new keywords for the task validator by @p-ferreira in #119
+- Save historic embeddings on disk by @opentaco in #121 
+- Updates relevance mechanism by @Eugene-hu in #122 
+
+## 1.1.4 / 2023-08-07
+- HOTFIX: create and serve the axon at startup by @robertalanm in #120
+
+
+## 1.1.3 / 2023-08-02
+- Adds subtensor to metagraph sync by @camfairchild in #79
+- Fix wandb weights format logging by @p-ferreira in #88
+- Adds netuid tag to wandb runs by @p-ferreira in #95
+- Implements GPU cleaning for optmization by @Eugene-hu in #96
+- Adds compatibility with bittensor 5.3.3 by @camfairchild in #107
+- Adds historic diversity component by @isabella618033 in #111
+- Improvements on diveristy model by @isabella618033 and @Eugene-hu in #111
+- Prompt improvements by @mrseeker in #110 and @p-ferreira in #112
+- Adds Task Validator Filter to reward pipeline by @p-ferreira in #112
+- Fix for empty data retrieval from datasets by @p-ferreira in #113
+- Deprecates pip usage by @p-ferreira in #114
diff --git a/README.md b/README.md
@@ -2,7 +2,6 @@
 
 # **Open Validators** <!-- omit in toc -->
 [![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/bittensor)
-[![PyPI version](https://badge.fury.io/py/openvalidators.svg)](https://badge.fury.io/py/openvalidators)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 
 
 ---
@@ -19,7 +18,7 @@ It offers several functionalities, such as:
 
 The main goal of this repository is to facilitate the interaction with the Bittensor network by providing a set of
 open-source validators to the community. The current validator implementation queries the network for responses and 
-evaluations using carefully crafted prompts, that are later evaluated by a large foundation GPT-J reward model.
+evaluations using carefully crafted prompts using CoT, that are later evaluated by a pipeline of reward functions, including diversity, relevance, rlhf, among others.
 
 Additionally, the repository provides an analysis and data toolkit that allows users to analyze the data generated from
 the validator's interaction with the network. By default, the validator collects various data points, such as question 
@@ -69,14 +68,7 @@ There are currently four main avenues for engaging with this repository:
    - Serves individuals, researchers, and developers who seek to create datasets for the community's miners.
 
 # Install
-There are two ways to use OpenTensor validators:
-
-1. With pip:
-```bash
-$ pip3 install openvalidators
-```
-
-2. From source:
+From source:
 ```bash
 $ git clone https://github.com/opentensor/validators.git
 $ pip3 install -e openvalidators/

diff --git a/openvalidators/__init__.py b/openvalidators/__init__.py
@@ -28,6 +28,6 @@
 from . import weights
 from . import event
 
-__version__ = "1.1.1"
+__version__ = "1.1.8"
 version_split = __version__.split(".")
 __spec_version__ = (1000 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2]))
diff --git a/openvalidators/config.py b/openvalidators/config.py
@@ -166,6 +166,16 @@ def add_args(cls, parser):
         default=4096,
     )
 
+    parser.add_argument(
+        "--neuron.axon_off",
+        "--axon_off",
+        action="store_true",
+        # Note: the validator needs to serve an Axon with their IP or they may
+        #   be blacklisted by the firewall of serving peers on the network.
+        help="Set this flag to not attempt to serve an Axon.",
+        default=False,
+    )
+
     parser.add_argument("--wandb.off", action="store_true", help="Turn off wandb.", default=False)
     parser.add_argument(
         "--wandb.project_name",
@@ -241,6 +251,12 @@ def add_args(cls, parser):
         action="store_true",
         help="Dont apply the diversity reward model",
         default=False,
+    )
+    parser.add_argument(
+        "--neuron.task_validator_off",
+        action="store_true",
+        help="Dont apply the task validator reward model",
+        default=False,
     )    
 
     parser.add_argument(

diff --git a/openvalidators/dataset.py b/openvalidators/dataset.py
@@ -27,11 +27,17 @@ def __init__(self):
         self.openwebtext = iter( load_dataset("openwebtext", split="train", streaming=True).shuffle(seed=seed, buffer_size=10000) )
         self.red_pajama = iter( load_dataset("togethercomputer/RedPajama-Data-1T", 'default', split='train', streaming=True).shuffle(seed=seed, buffer_size=10000) )
 
-    def __next__(self):
-        if random.random() < 0.5:
-            return {"text": next(self.openwebtext)["text"]}
-        else:
-            return {"text": next(self.red_pajama)["text"]}
+    def __next__(self):         
+         while True:
+            bt.logging.debug('Retrieving data from dataset...')
+            if random.random() < 0.5:
+                text = next(self.openwebtext)["text"]
+            else:
+                text = next(self.red_pajama)["text"]
+
+            # Check if the text is not empty or does not consist only of newline characters
+            if text.strip():
+                return {"text": text}
 
 
 class MockDataset(Iterator):

diff --git a/openvalidators/event.py b/openvalidators/event.py
@@ -45,6 +45,7 @@ class EventSchema:
     rlhf_reward_model: Optional[List[float]]  # Output vector of the rlhf reward model
     prompt_reward_model: Optional[List[float]]  # Output vector of the prompt reward model
     relevance_filter: Optional[List[float]]  # Output vector of the relevance scoring reward model
+    task_validator_filter: Optional[List[float]]
 
     # Weights data
     set_weights: Optional[List[List[float]]]
@@ -55,6 +56,7 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> 'EventSchema':
         rewards = {
             'dahoas_reward_model': event_dict.get(RewardModelType.dahoas.value),
             'blacklist_filter': event_dict.get(RewardModelType.blacklist.value),
+            'task_validator_filter': event_dict.get(RewardModelType.task_validator.value),
             'nsfw_filter': event_dict.get(RewardModelType.nsfw.value),
             'relevance_filter': event_dict.get(RewardModelType.relevance.value),
             'reciprocate_reward_model': event_dict.get(RewardModelType.reciprocate.value),

diff --git a/openvalidators/forward.py b/openvalidators/forward.py
@@ -62,7 +62,11 @@ def get_random_uids(self, k: int, exclude: List[int] = None) -> torch.LongTensor
     return uids
 
 
-async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = []):
+async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude: list = [], base_prompt = None):
+
+    if base_prompt == None:
+        base_prompt = prompt
+
     bt.logging.debug("run_step", name)
 
     # Record event start time.
@@ -83,18 +87,20 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude
     # Compute the rewards for the responses given the prompt.
     rewards: torch.FloatTensor = torch.zeros(len(responses), dtype=torch.float32).to(self.device)
     for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions):
-        reward_i = reward_fn_i.apply(prompt, responses, name).to(self.device)
-        rewards += weight_i * reward_i
+        reward_i, reward_i_normalized = reward_fn_i.apply(prompt, responses, name).to(self.device)
+        rewards += weight_i * reward_i_normalized
         if not self.config.neuron.disable_log_rewards:
             event[reward_fn_i.name] = reward_i.tolist()
-        bt.logging.trace(str(reward_fn_i.name), reward_i.tolist())
+            event[reward_fn_i.name + '_normalized'] = reward_i_normalized.tolist()
+        bt.logging.trace(str(reward_fn_i.name), reward_i_normalized.tolist())
 
     for masking_fn_i in self.masking_functions:
-        mask_i = masking_fn_i.apply(prompt, responses, name).to(self.device)
-        rewards *= mask_i  # includes diversity
+        mask_i, mask_i_normalized = masking_fn_i.apply(base_prompt, responses, name).to(self.device)
+        rewards *= mask_i_normalized  # includes diversity
         if not self.config.neuron.disable_log_rewards:
             event[masking_fn_i.name] = mask_i.tolist()
-        bt.logging.trace(str(masking_fn_i.name), mask_i.tolist())
+            event[masking_fn_i.name + '_normalized'] = mask_i_normalized.tolist()
+        bt.logging.trace(str(masking_fn_i.name), mask_i_normalized.tolist())
 
     # Train the gating model based on the predicted scores and the actual rewards.
     gating_scores: torch.FloatTensor = self.gating_model(prompt).to(self.device)
@@ -168,6 +174,7 @@ async def forward(self):
     )
 
     base_text = augment_event["best"]
+    base_prompt = augment_event["best"]
     exclude = augment_event["uids"]
     for k in range(self.config.neuron.num_followup_steps):
 
@@ -180,6 +187,7 @@ async def forward(self):
             k=self.config.neuron.followup_sample_size,
             timeout=self.config.neuron.followup_timeout,
             exclude=exclude,
+            base_prompt=base_prompt
         )
         exclude += followup_event["uids"]
 
@@ -192,6 +200,7 @@ async def forward(self):
             k=self.config.neuron.answer_sample_size,
             timeout=self.config.neuron.answer_timeout,
             exclude=exclude,
+            base_prompt=followup_event["best"]
         )
         exclude += answer_event["uids"]
 
@@ -205,3 +214,4 @@ async def forward(self):
             )
         else:
             base_text = base_text + "\nQuestion:" + followup_event["best"] + "\nAnswer:" + answer_event["best"]
+
diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py
@@ -34,11 +34,12 @@
 # Load gating models
 from openvalidators.reward import (
     Blacklist,
+    TaskValidator,
     NSFWRewardModel,
     DirectPreferenceRewardModel,
     OpenAssistantRewardModel,
     ReciprocateRewardModel,
-    BertRelevanceRewardModel,
+    RelevanceRewardModel,
     MockRewardModel,
     DahoasRewardModel,
     DiversityRewardModel,
@@ -89,7 +90,9 @@ def __init__(self):
         self.wallet = bt.wallet(config=self.config)
         self.wallet.create_if_non_existent()
         if not self.config.wallet._mock:
-            self.wallet.reregister(subtensor=self.subtensor, netuid=self.config.netuid)
+            if not self.subtensor.is_hotkey_registered_on_subnet(hotkey_ss58=self.wallet.hotkey.ss58_address, netuid=self.config.netuid):
+                raise Exception(f'Wallet not currently registered on netuid {self.config.netuid}, please first register wallet before running')
+
         bt.logging.debug(str(self.wallet))
 
         # Init metagraph.
@@ -125,6 +128,32 @@ def __init__(self):
             self.gating_model = GatingModel(metagraph=self.metagraph, config=self.config).to(self.device)
         bt.logging.debug(str(self.gating_model))
 
+        if not self.config.neuron.axon_off:
+            bt.logging.debug('serving ip to chain...')
+            try:
+                axon = bt.axon( 
+                    wallet=self.wallet, metagraph=self.metagraph, config=self.config 
+                )
+
+                try:
+                    self.subtensor.serve_axon(
+                        netuid=self.config.netuid,
+                        axon=axon,
+                        use_upnpc=False,
+                        wait_for_finalization=True,
+                    )
+                except Exception as e:
+                    bt.logging.error(f'Failed to serve Axon with exception: {e}')
+                    pass
+
+                del axon
+            except Exception as e:
+                bt.logging.error(f'Failed to create Axon initialize with exception: {e}')
+                pass
+
+        else:
+            bt.logging.debug('axon off, not serving ip to chain.')
+
         # Dendrite pool for querying the network during  training.
         bt.logging.debug("loading", "dendrite_pool")
         if self.config.neuron.mock_dendrite_pool:
@@ -190,24 +219,31 @@ def __init__(self):
 
                 bt.logging.error(message)
                 raise Exception(message)
-
+
+            # Masking functions
             self.blacklist = (
                 Blacklist() if not self.config.neuron.blacklist_off else MockRewardModel(RewardModelType.blacklist.value)
             )
+            task_validator = (
+                TaskValidator() if not self.config.neuron.task_validator_off
+                else MockRewardModel(RewardModelType.task_validator.value)
+            )
+            relevance_model = (
+                RelevanceRewardModel(device=self.device) if not self.config.neuron.relevance_off
+                else MockRewardModel(RewardModelType.relevance.value)
+            )
+            self.diversity_model = (
+                DiversityRewardModel(device=self.device) if not self.config.neuron.diversity_off
+                else MockRewardModel(RewardModelType.diversity.value)
+            )
+            nsfw_model = (
+                NSFWRewardModel(device=self.device) if not self.config.neuron.nsfw_off
+                else MockRewardModel(RewardModelType.nsfw.value)              
+            )
 
-            self.masking_functions = [
-                self.blacklist,
-                BertRelevanceRewardModel(device=self.device)
-                if not self.config.neuron.relevance_off
-                else MockRewardModel(RewardModelType.relevance.value),
-                DiversityRewardModel(device=self.device)
-                if not self.config.neuron.diversity_off
-                else MockRewardModel(RewardModelType.diversity.value),
-                NSFWRewardModel(device=self.device)
-                if not self.config.neuron.nsfw_off
-                else MockRewardModel(RewardModelType.nsfw.value),
-            ]
+            self.masking_functions = [self.blacklist, task_validator, relevance_model, self.diversity_model, nsfw_model]
             bt.logging.debug(str(self.reward_functions))
+            bt.logging.debug(str(self.masking_functions))
 
         # Init the event loop.
         self.loop = asyncio.get_event_loop()

diff --git a/openvalidators/prompts.py b/openvalidators/prompts.py
@@ -124,7 +124,7 @@ def find_unique_tags(input_text: str):
 
 
 # Request a follow-up question given a preceding context.
-followup_request_template = "Ask one relevant and insightful question about the preceding context"
+followup_request_template = "Ask a single relevant and insightful question about the preceding context"
 
 # Scores a summary on a scale from 0 to 10, given a context.
 augment_scoring_template = """Score the relevance, succinctness, and quality of a summary given a context. The context is within <Context></Context> tags, and the question is within <Summary></Summary> tags. Give a score between 0 and 10 in the <Score></Score> tags, where 0 means the summary is irrelevant, and 10 means it's perfectly relevant and a good summary. Include a brief explanation for your score based solely on the context-summary relationship.
@@ -348,16 +348,16 @@ def find_unique_tags(input_text: str):
 
 def followup_prompt( base_text:str, i:int = 0) -> str:
     if i == 0:
-        return f"{base_text}\n\n{followup_request_template}\n"
+        return f"{base_text}\n\n{followup_request_template}\n. Do not try to return an answer or a summary:"
     else:
-        return f"{base_text}\n\n{followup_request_template} and previous questions\n"
+        return f"{base_text}\n\n{followup_request_template} and previous questions. Do not try to return an answer or a summary:\n"
 
 
 def answer_prompt( base_text:str, followup:str ) -> str:
-    return f"{base_text}\n Question:{followup}\n Answer the question step by step and explain your thoughts"
+    return f"{base_text}\n\nQuestion:{followup}\nAnswer the question step by step and explain your thoughts. Do not include questions or summaries in your answer."
 
 augment_request_template = "Summarize the preceding context"
 
 def augment_prompt( base_text:str ) -> str:
     random_level = random.randint(4, 8)
-    return f"{base_text}\n\n{augment_request_template} in {random_level} sentences.\n\n"
+    return f"{base_text}\n\n{augment_request_template} in {random_level} sentences. Do not try to create questions or answers for your summarization.\n\n"
diff --git a/openvalidators/reward/__init__.py b/openvalidators/reward/__init__.py
@@ -1,12 +1,13 @@
 from .blacklist import Blacklist
+from .task_validator import TaskValidator
 from .nsfw import NSFWRewardModel
 from .dpo import DirectPreferenceRewardModel
 from .open_assistant import OpenAssistantRewardModel
 from .reciprocate import ReciprocateRewardModel
-from .relevance import BertRelevanceRewardModel
+from .relevance import RelevanceRewardModel
 from .reward import BaseRewardModel
 from .reward import MockRewardModel
 from .dahoas import DahoasRewardModel
 from .diversity import DiversityRewardModel
 from .prompt import PromptRewardModel
-from .config import RewardModelType, DefaultRewardFrameworkConfig
+from .config import RewardModelType, DefaultRewardFrameworkConfig
diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py
@@ -1,7 +1,5 @@
 # The MIT License (MIT)
 # Copyright © 2021 Yuma Rao
-from dataclasses import dataclass
-
 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 # documentation files (the “Software”), to deal in the Software without restriction, including without limitation
 # the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
@@ -15,7 +13,7 @@
 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
-
+from dataclasses import dataclass
 from enum import Enum
 
 
@@ -29,6 +27,7 @@ class RewardModelType(Enum):
     blacklist = 'blacklist_filter'
     nsfw = 'nsfw_filter'
     relevance = 'relevance_filter'
+    task_validator = 'task_validator_filter'
 
 
 @dataclass(frozen=True)