diff --git a/openvalidators/reward/config.py b/openvalidators/reward/config.py index cdab7d0..a8ae5dc 100644 --- a/openvalidators/reward/config.py +++ b/openvalidators/reward/config.py @@ -35,8 +35,8 @@ class DefaultRewardFrameworkConfig: """Reward framework default configuration. Note: All the weights should add up to 1.0. """ - dpo_model_weight: float = 0.2 + dpo_model_weight: float = 0.3 rlhf_model_weight: float = 0.4 - reciprocate_model_weight: float = 0.4 + reciprocate_model_weight: float = 0.3 dahoas_model_weight: float = 0 prompt_model_weight: float = 0