Skip to content

Commit

Permalink
Revise prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
kcz358 committed Nov 27, 2024
1 parent aabb021 commit 30a47b3
Show file tree
Hide file tree
Showing 27 changed files with 197 additions and 38 deletions.
9 changes: 8 additions & 1 deletion lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.common_voice_15_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.common_voice_15_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|fr|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.common_voice_15_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|zh|>"
7 changes: 4 additions & 3 deletions lmms_eval/tasks/common_voice_15/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def common_voice_15_doc_to_audio(doc):
return [doc["audio"]]


def common_voice_15_doc_to_text(doc):
lan = doc["task"][4:]
return f"Detect the language and recognize the speech: <|{lan}|>"
def common_voice_15_doc_to_text(doc, lmms_eval_specific_kwargs):
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
post_prompt = lmms_eval_specific_kwargs["post_prompt"]
return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}"


def common_voice_15_process_result(doc, result):
Expand Down
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.gigaspeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
7 changes: 4 additions & 3 deletions lmms_eval/tasks/gigaspeech/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ def gigaspeech_doc_to_audio(doc):
return [doc["audio"]]


def gigaspeech_doc_to_text(doc):
lan = "en"
return f"Detect the language and recognize the speech: <|{lan}|>"
def gigaspeech_doc_to_text(doc, lmms_eval_specific_kwargs):
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
post_prompt = lmms_eval_specific_kwargs["post_prompt"]
return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}"


def gigaspeech_process_result(doc, result):
Expand Down
9 changes: 8 additions & 1 deletion lmms_eval/tasks/librispeech/librispeech_dev_clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.librispeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/librispeech/librispeech_dev_other.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.librispeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/librispeech/librispeech_test_clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.librispeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/librispeech/librispeech_test_other.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.librispeech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
7 changes: 4 additions & 3 deletions lmms_eval/tasks/librispeech/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def librispeech_doc_to_audio(doc):
return [doc["audio"]]


def librispeech_doc_to_text(doc):
lan = doc["task"][4:]
return f"Detect the language and recognize the speech: <|{lan}|>"
def librispeech_doc_to_text(doc, lmms_eval_specific_kwargs):
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
post_prompt = lmms_eval_specific_kwargs["post_prompt"]
return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}"


def librispeech_process_result(doc, result):
Expand Down
11 changes: 9 additions & 2 deletions lmms_eval/tasks/people_speech/people_speech_val.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
dataset_path: lmms-lab/people_speech
dataset_path: lmms-lab/peoples_speech
dataset_kwargs:
token: True
task : "people_speech_val"
Expand All @@ -19,4 +19,11 @@ metric_list:
aggregation : !function utils.people_speech_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
7 changes: 4 additions & 3 deletions lmms_eval/tasks/people_speech/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def people_speech_doc_to_audio(doc):
return [doc["audio"]]


def people_speech_doc_to_text(doc):
lan = doc["task"][4:]
return f"Detect the language and recognize the speech: <|{lan}|>"
def people_speech_doc_to_text(doc, lmms_eval_specific_kwargs):
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
post_prompt = lmms_eval_specific_kwargs["post_prompt"]
return f"{pre_prompt}Please recognize the speech and only output the recognized content:{post_prompt}"


def people_speech_process_result(doc, result):
Expand Down
9 changes: 8 additions & 1 deletion lmms_eval/tasks/tedlium/tedlium_dev_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.tedlium_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
9 changes: 8 additions & 1 deletion lmms_eval/tasks/tedlium/tedlium_long_form.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ metric_list:
aggregation : !function utils.tedlium_wer
higher_is_better : false
metadata:
- version: 0.0
- version: 0.0
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
qwen2_audio:
pre_prompt: ""
post_prompt: " <|en|>"
Loading

0 comments on commit 30a47b3

Please sign in to comment.