From ea3f44d4dadf5e58afa0990dd5977286fa2f4713 Mon Sep 17 00:00:00 2001 From: isabella618033 Date: Fri, 25 Aug 2023 17:13:18 +0000 Subject: [PATCH] big fix --- openvalidators/forward.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 5b5e071..8b71847 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -87,16 +87,16 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude # Compute the rewards for the responses given the prompt. rewards: torch.FloatTensor = torch.zeros(len(responses), dtype=torch.float32).to(self.device) for weight_i, reward_fn_i in zip(self.reward_weights, self.reward_functions): - reward_i, reward_i_normalized = reward_fn_i.apply(prompt, responses, name).to(self.device) - rewards += weight_i * reward_i_normalized + reward_i, reward_i_normalized = reward_fn_i.apply(prompt, responses, name) + rewards += weight_i * reward_i_normalized.to(self.device) if not self.config.neuron.disable_log_rewards: event[reward_fn_i.name] = reward_i.tolist() event[reward_fn_i.name + '_normalized'] = reward_i_normalized.tolist() bt.logging.trace(str(reward_fn_i.name), reward_i_normalized.tolist()) for masking_fn_i in self.masking_functions: - mask_i, mask_i_normalized = masking_fn_i.apply(base_prompt, responses, name).to(self.device) - rewards *= mask_i_normalized # includes diversity + mask_i, mask_i_normalized = masking_fn_i.apply(base_prompt, responses, name) + rewards *= mask_i_normalized.to(self.device) # includes diversity if not self.config.neuron.disable_log_rewards: event[masking_fn_i.name] = mask_i.tolist() event[masking_fn_i.name + '_normalized'] = mask_i_normalized.tolist()