From c9dbf4dfb99c5128a4783a1120131a1e8443616e Mon Sep 17 00:00:00 2001 From: Eugene Date: Fri, 25 Aug 2023 13:56:04 -0700 Subject: [PATCH 1/2] reweighting --- openvalidators/reward/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py index cdab7d0..53581b0 100644 --- a/openvalidators/reward/config.py +++ b/openvalidators/reward/config.py @@ -35,8 +35,8 @@ class DefaultRewardFrameworkConfig: """Reward framework default configuration. Note: All the weights should add up to 1.0. """ - dpo_model_weight: float = 0.2 + dpo_model_weight: float = 0.4 rlhf_model_weight: float = 0.4 - reciprocate_model_weight: float = 0.4 + reciprocate_model_weight: float = 0.2 dahoas_model_weight: float = 0 prompt_model_weight: float = 0 From e513a97e77936fdac83441113b114d3b15b74926 Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 28 Aug 2023 08:27:35 -0700 Subject: [PATCH 2/2] small reweight --- openvalidators/reward/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py index 53581b0..a8ae5dc 100644 --- a/openvalidators/reward/config.py +++ b/openvalidators/reward/config.py @@ -35,8 +35,8 @@ class DefaultRewardFrameworkConfig: """Reward framework default configuration. Note: All the weights should add up to 1.0. """ - dpo_model_weight: float = 0.4 + dpo_model_weight: float = 0.3 rlhf_model_weight: float = 0.4 - reciprocate_model_weight: float = 0.2 + reciprocate_model_weight: float = 0.3 dahoas_model_weight: float = 0 prompt_model_weight: float = 0