From 30a47b36fd4a396fd235bd1f30174e3eafdb9d3f Mon Sep 17 00:00:00 2001 From: kcz358 Date: Mon, 25 Nov 2024 07:13:06 +0000 Subject: [PATCH] Revise prompt --- .../tasks/common_voice_15/common_voice_15_en.yaml | 9 ++++++++- .../tasks/common_voice_15/common_voice_15_fr.yaml | 9 ++++++++- .../tasks/common_voice_15/common_voice_15_zh-CN.yaml | 9 ++++++++- lmms_eval/tasks/common_voice_15/utils.py | 7 ++++--- lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml | 9 ++++++++- lmms_eval/tasks/gigaspeech/utils.py | 7 ++++--- .../tasks/librispeech/librispeech_dev_clean.yaml | 9 ++++++++- .../tasks/librispeech/librispeech_dev_other.yaml | 9 ++++++++- .../tasks/librispeech/librispeech_test_clean.yaml | 9 ++++++++- .../tasks/librispeech/librispeech_test_other.yaml | 9 ++++++++- lmms_eval/tasks/librispeech/utils.py | 7 ++++--- lmms_eval/tasks/people_speech/people_speech_val.yaml | 11 +++++++++-- lmms_eval/tasks/people_speech/utils.py | 7 ++++--- lmms_eval/tasks/tedlium/tedlium_dev_test.yaml | 9 ++++++++- lmms_eval/tasks/tedlium/tedlium_long_form.yaml | 9 ++++++++- lmms_eval/tasks/tedlium/utils.py | 7 ++++--- 27 files changed, 197 insertions(+), 38 deletions(-) diff --git a/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml b/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml index edd213ab..b8e2fb36 100644 --- a/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml +++ b/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.common_voice_15_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml b/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml index 5c22bdec..1254c2bc 100644 --- a/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml +++ b/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.common_voice_15_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|fr|>" \ No newline at end of file diff --git a/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml b/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml index 05c698b9..92dba1d5 100644 --- a/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml +++ b/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.common_voice_15_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|zh|>" \ No newline at end of file diff --git a/lmms_eval/tasks/common_voice_15/utils.py b/lmms_eval/tasks/common_voice_15/utils.py index 6a8bfdcf..312bfa17 100644 --- a/lmms_eval/tasks/common_voice_15/utils.py +++ b/lmms_eval/tasks/common_voice_15/utils.py @@ -30,9 +30,10 @@ def common_voice_15_doc_to_audio(doc): return [doc["audio"]] -def common_voice_15_doc_to_text(doc): - lan = doc["task"][4:] - return f"Detect the language and recognize the speech: <|{lan}|>" +def common_voice_15_doc_to_text(doc, lmms_eval_specific_kwargs): + pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] + post_prompt = lmms_eval_specific_kwargs["post_prompt"] + return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}" def common_voice_15_process_result(doc, result): diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml index 6749db0e..d4ed3346 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml index b9802c9a..97fa7f4e 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml index 61202e6c..75bb14f0 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml index 0810ae3e..29f0bbfd 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml index c7253747..9e9dfbcb 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml index cdf08bda..cbdf8fe9 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml index fd50ba76..d878a0cc 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml index b51a0bd2..d6f98db1 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml index c7bc06b0..86d39b69 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml index 70c66f95..67e1f463 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml index db6bc293..558a154e 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml b/lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml index 6048ede2..744a0260 100644 --- a/lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml +++ b/lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.gigaspeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/gigaspeech/utils.py b/lmms_eval/tasks/gigaspeech/utils.py index 38c3b8b9..c45a9d61 100755 --- a/lmms_eval/tasks/gigaspeech/utils.py +++ b/lmms_eval/tasks/gigaspeech/utils.py @@ -19,9 +19,10 @@ def gigaspeech_doc_to_audio(doc): return [doc["audio"]] -def gigaspeech_doc_to_text(doc): - lan = "en" - return f"Detect the language and recognize the speech: <|{lan}|>" +def gigaspeech_doc_to_text(doc, lmms_eval_specific_kwargs): + pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] + post_prompt = lmms_eval_specific_kwargs["post_prompt"] + return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}" def gigaspeech_process_result(doc, result): diff --git a/lmms_eval/tasks/librispeech/librispeech_dev_clean.yaml b/lmms_eval/tasks/librispeech/librispeech_dev_clean.yaml index 569a9120..237d955e 100644 --- a/lmms_eval/tasks/librispeech/librispeech_dev_clean.yaml +++ b/lmms_eval/tasks/librispeech/librispeech_dev_clean.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.librispeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/librispeech/librispeech_dev_other.yaml b/lmms_eval/tasks/librispeech/librispeech_dev_other.yaml index 9b5a9305..c4426db5 100644 --- a/lmms_eval/tasks/librispeech/librispeech_dev_other.yaml +++ b/lmms_eval/tasks/librispeech/librispeech_dev_other.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.librispeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/librispeech/librispeech_test_clean.yaml b/lmms_eval/tasks/librispeech/librispeech_test_clean.yaml index d1ad3f4a..bf1c680e 100644 --- a/lmms_eval/tasks/librispeech/librispeech_test_clean.yaml +++ b/lmms_eval/tasks/librispeech/librispeech_test_clean.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.librispeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/librispeech/librispeech_test_other.yaml b/lmms_eval/tasks/librispeech/librispeech_test_other.yaml index 26034147..ccac2ec0 100644 --- a/lmms_eval/tasks/librispeech/librispeech_test_other.yaml +++ b/lmms_eval/tasks/librispeech/librispeech_test_other.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.librispeech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/librispeech/utils.py b/lmms_eval/tasks/librispeech/utils.py index 345effe5..dfe4109b 100755 --- a/lmms_eval/tasks/librispeech/utils.py +++ b/lmms_eval/tasks/librispeech/utils.py @@ -30,9 +30,10 @@ def librispeech_doc_to_audio(doc): return [doc["audio"]] -def librispeech_doc_to_text(doc): - lan = doc["task"][4:] - return f"Detect the language and recognize the speech: <|{lan}|>" +def librispeech_doc_to_text(doc, lmms_eval_specific_kwargs): + pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] + post_prompt = lmms_eval_specific_kwargs["post_prompt"] + return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}" def librispeech_process_result(doc, result): diff --git a/lmms_eval/tasks/people_speech/people_speech_val.yaml b/lmms_eval/tasks/people_speech/people_speech_val.yaml index bc8358a2..b680549d 100644 --- a/lmms_eval/tasks/people_speech/people_speech_val.yaml +++ b/lmms_eval/tasks/people_speech/people_speech_val.yaml @@ -1,4 +1,4 @@ -dataset_path: lmms-lab/people_speech +dataset_path: lmms-lab/peoples_speech dataset_kwargs: token: True task : "people_speech_val" @@ -19,4 +19,11 @@ metric_list: aggregation : !function utils.people_speech_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" diff --git a/lmms_eval/tasks/people_speech/utils.py b/lmms_eval/tasks/people_speech/utils.py index 4113fdbe..af26f832 100644 --- a/lmms_eval/tasks/people_speech/utils.py +++ b/lmms_eval/tasks/people_speech/utils.py @@ -30,9 +30,10 @@ def people_speech_doc_to_audio(doc): return [doc["audio"]] -def people_speech_doc_to_text(doc): - lan = doc["task"][4:] - return f"Detect the language and recognize the speech: <|{lan}|>" +def people_speech_doc_to_text(doc, lmms_eval_specific_kwargs): + pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] + post_prompt = lmms_eval_specific_kwargs["post_prompt"] + return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}" def people_speech_process_result(doc, result): diff --git a/lmms_eval/tasks/tedlium/tedlium_dev_test.yaml b/lmms_eval/tasks/tedlium/tedlium_dev_test.yaml index 81b38cab..18810f4c 100644 --- a/lmms_eval/tasks/tedlium/tedlium_dev_test.yaml +++ b/lmms_eval/tasks/tedlium/tedlium_dev_test.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.tedlium_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/tedlium/tedlium_long_form.yaml b/lmms_eval/tasks/tedlium/tedlium_long_form.yaml index e09ad0ff..f72983b7 100644 --- a/lmms_eval/tasks/tedlium/tedlium_long_form.yaml +++ b/lmms_eval/tasks/tedlium/tedlium_long_form.yaml @@ -20,4 +20,11 @@ metric_list: aggregation : !function utils.tedlium_wer higher_is_better : false metadata: - - version: 0.0 \ No newline at end of file + - version: 0.0 +lmms_eval_specific_kwargs: + default: + pre_prompt: "" + post_prompt: "" + qwen2_audio: + pre_prompt: "" + post_prompt: " <|en|>" \ No newline at end of file diff --git a/lmms_eval/tasks/tedlium/utils.py b/lmms_eval/tasks/tedlium/utils.py index 6823c29b..606b15e3 100644 --- a/lmms_eval/tasks/tedlium/utils.py +++ b/lmms_eval/tasks/tedlium/utils.py @@ -30,9 +30,10 @@ def tedlium_doc_to_audio(doc): return [doc["audio"]] -def tedlium_doc_to_text(doc): - lan = doc["task"][4:] - return f"Detect the language and recognize the speech: <|{lan}|>" +def tedlium_doc_to_text(doc, lmms_eval_specific_kwargs): + pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] + post_prompt = lmms_eval_specific_kwargs["post_prompt"] + return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}" def tedlium_process_result(doc, result):