Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix parse response #262

Merged
merged 1 commit into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion evalscope/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ async def statistic_benchmark_metric_worker(benchmark_data_queue: asyncio.Queue,
while not (data_process_completed_event.is_set() and benchmark_data_queue.empty()):
try:
# Attempt to get benchmark data from the queue with a timeout
benchmark_data = await asyncio.wait_for(benchmark_data_queue.get(), timeout=1)
benchmark_data = await asyncio.wait_for(benchmark_data_queue.get(), timeout=0.01)
benchmark_data_queue.task_done()
except asyncio.TimeoutError:
# If timeout, continue to the next iteration
Expand Down
20 changes: 11 additions & 9 deletions evalscope/perf/plugin/api/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,21 @@ def __compose_query_from_parameter(self, payload: Dict, param: Arguments):

def parse_responses(self, responses, request: Any = None, **kwargs) -> Dict:
"""Parser responses and return number of request and response tokens.
One response for non-stream, multiple responses for stream.
Only one response for non-stream, multiple responses for stream.
"""
delta_contents = {}
input_tokens = None
output_tokens = None

# when stream, the last response is the full usage
# when non-stream, the last response is the first response
last_response_js = json.loads(responses[-1])
if 'usage' in last_response_js and last_response_js['usage']:
input_tokens = last_response_js['usage']['prompt_tokens']
output_tokens = last_response_js['usage']['completion_tokens']
return input_tokens, output_tokens

# no usage information in the response, parse the response to get the tokens
delta_contents = {}
for response in responses:
js = json.loads(response)
if 'usage' in js and js['usage']:
input_tokens = js['usage']['prompt_tokens']
output_tokens = js['usage']['completion_tokens']
return input_tokens, output_tokens

if 'object' in js:
self.__process_response_object(js, delta_contents)
else:
Expand Down
6 changes: 3 additions & 3 deletions tests/perf/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ def tearDown(self) -> None:
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf(self):
task_cfg = {
'url': 'http://127.0.0.1:8000/v1/chat/completions',
'url': 'http://127.0.0.1:8001/v1/chat/completions',
'parallel': 1,
'model': 'qwen2.5',
'number': 15,
'api': 'openai',
'dataset': 'openqa',
'stream': True,
# 'stream': True,
'debug': True,
}
run_perf_benchmark(task_cfg)
Expand All @@ -47,7 +47,7 @@ def test_run_perf_stream(self):
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_speed_benchmark(self):
task_cfg = {
'url': 'http://127.0.0.1:8801/v1/completions',
'url': 'http://127.0.0.1:8001/v1/completions',
'parallel': 1,
'model': 'qwen2.5',
'api': 'openai',
Expand Down
Loading