Skip to content

Commit

Permalink
Add image-to-text evaluation tasks and templates
Browse files Browse the repository at this point in the history
  • Loading branch information
pufanyi committed Dec 1, 2024
1 parent cc232f7 commit b89847a
Show file tree
Hide file tree
Showing 12 changed files with 553 additions and 23 deletions.
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mix_evals/image2text/_default_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
dataset_path: MixEval/MixEval-X
dataset_kwargs:
cache_dir: mix_evals_image2text
lmms_eval_specific_kwargs:
default:
post_prompt: ""
pre_prompt: ""
gpt4v:
post_prompt: ""
pre_prompt: ""
metadata:
version: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
group: mix_evals_image2text
task:
- mix_evals_image2text_mc
- mix_evals_image2text_freeform
# - mix_evals_video2text_openended
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
task: "mix_evals_image2text_freeform"
dataset_name: "image2text"
test_split: free_form
output_type: generate_until
doc_to_visual: !function utils.mix_evals_image2text_doc_to_visual
doc_to_text: !function utils.mix_evals_image2text_doc_to_text
doc_to_target: "{{reference_answer}}"
process_results: !function utils.mix_evals_image2text_process_results_freeform
metric_list:
- metric: gpt_eval
aggregation: !function utils.mix_evals_image2text_gpt_eval
higher_is_better: true

generation_kwargs:
max_new_tokens: 16

include: _default_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
task: "mix_evals_image2text_freeform_hard"
dataset_name: "image2text"
test_split: free_form_hard
output_type: generate_until
doc_to_visual: !function utils.mix_evals_image2text_doc_to_visual
doc_to_text: !function utils.mix_evals_image2text_doc_to_text
doc_to_target: "{{reference_answer}}"
process_results: !function utils.mix_evals_image2text_process_results_freeform
metric_list:
- metric: gpt_eval
aggregation: !function utils.mix_evals_image2text_gpt_eval
higher_is_better: true

generation_kwargs:
max_new_tokens: 16

include: _default_template_yaml

lmms_eval_specific_kwargs:
default:
pre_prompt: "Please answer the following questions about the image."
post_prompt: ""
gpt4v:
pre_prompt: "Please answer the following questions about the image."
post_prompt: ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
group: mix_evals_image2text_hard
task:
- mix_evals_image2text_mc_hard
- mix_evals_image2text_freeform_hard
# - mix_evals_image2text_openended
23 changes: 23 additions & 0 deletions lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
include: _default_template_yaml
task: "mix_evals_image2text_mc"
dataset_name: "image2text"
test_split: multiple_choice
output_type: generate_until
doc_to_visual: !function utils.mix_evals_image2text_doc_to_visual
doc_to_text: !function utils.mix_evals_image2text_doc_to_text
doc_to_target: "{{reference_answer}}"

generation_kwargs:
max_new_tokens: 5

metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true

filter_list:
- name: "flexible-extract"
filter:
- function: !function utils.GPTMultiChoiceFilter
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
include: _default_template_yaml
task: "mix_evals_image2text_mc_hard"
dataset_name: "image2text"
test_split: multiple_choice_hard
output_type: generate_until
doc_to_visual: !function utils.mix_evals_image2text_doc_to_visual
doc_to_text: !function utils.mix_evals_image2text_doc_to_text
doc_to_target: "{{reference_answer}}"

generation_kwargs:
max_new_tokens: 5

metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true

filter_list:
- name: "flexible-extract"
filter:
- function: !function utils.GPTMultiChoiceFilter
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
include: _default_template_yaml
dataset_name: "open_ended"
task: "mix_evals_image2text_openended"
test_split: image2text
output_type: generate_until
doc_to_visual: !function utils.mix_evals_image2text_doc_to_visual
doc_to_text: !function utils.mix_evals_image2text_doc_to_text_open_convs
doc_to_target: ""
process_results: !function utils.mix_evals_video2text_process_results_open_convs

metric_list:
- metric: submission
aggregation: !function utils.mix_evals_video2text_aggregate_gen
higher_is_better: true
Loading

0 comments on commit b89847a

Please sign in to comment.