ModelTC · hiworldwzj · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/lightllm/server/api_server.py b/lightllm/server/api_server.py
@@ -524,7 +524,14 @@ def main():
         assert args.disable_cudagraph
 
     # 这些模式不能同时设置。
-    assert [args.splitfuse_mode, args.beam_mode, args.diverse_mode, args.token_healing_mode].count(True) <= 1
+    assert [
+        args.splitfuse_mode,
+        args.beam_mode,
+        args.diverse_mode,
+        args.token_healing_mode,
+        args.use_reward_model,
+        args.return_all_prompt_logprobs,
+    ].count(True) <= 1
     # 部分模式目前还无法与dynamic_prompt_cache一起跑，to do。
     if args.use_dynamic_prompt_cache:
         assert args.beam_mode is False

diff --git a/lightllm/server/router/model_infer/mode_backend/continues_batch/impl_for_reward_model.py b/lightllm/server/router/model_infer/mode_backend/continues_batch/impl_for_reward_model.py
@@ -23,7 +23,7 @@ def forward(self, batch_id, is_prefill):
         kwargs, run_reqs = prepare_prefill_inputs(batch, self.radix_cache, self.is_multimodal)
 
         scores: torch.Tensor = self.model.forward(**kwargs)
-        scores = scores.detach().cpu().numpy()
+        scores = scores[0].detach().cpu().numpy()
 
         next_token_id = 1
         next_token_logprob = 1.0
@@ -36,7 +36,7 @@ def forward(self, batch_id, is_prefill):
             req_obj.out_token_id_count[next_token_id] += 1
             req_obj.finish_status = FinishStatus.FINISHED_STOP
 
-            metadata = {"id": int(next_token_id), "logprob": float(next_token_logprob), "score": float(score[0])}
+            metadata = {"id": int(next_token_id), "logprob": float(next_token_logprob), "score": float(score)}
 
             output_dict[req_obj.r_id] = (
                 req_obj.req_status,