Merge pull request #140 from opentensor/dpo-reweighting

reweighting reward models
opentensor · Aug 28, 2023 · dc65c72 · dc65c72
2 parents bd315ec + e513a97
commit dc65c72
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py
@@ -35,8 +35,8 @@ class DefaultRewardFrameworkConfig:
     """Reward framework default configuration.
     Note: All the weights should add up to 1.0.
     """
-    dpo_model_weight: float = 0.2
+    dpo_model_weight: float = 0.3
     rlhf_model_weight: float = 0.4
-    reciprocate_model_weight: float = 0.4
+    reciprocate_model_weight: float = 0.3
     dahoas_model_weight: float = 0
     prompt_model_weight: float = 0