diff --git a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform.yaml b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform.yaml index 1a21c46f..e1e7cded 100644 --- a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform.yaml +++ b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform.yaml @@ -12,6 +12,6 @@ metric_list: higher_is_better: true generation_kwargs: - max_new_tokens: 16 + max_new_tokens: 1024 include: _default_template_yaml diff --git a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform_hard.yaml b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform_hard.yaml index e0aeea52..24874364 100644 --- a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform_hard.yaml +++ b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform_hard.yaml @@ -12,7 +12,7 @@ metric_list: higher_is_better: true generation_kwargs: - max_new_tokens: 16 + max_new_tokens: 1024 include: _default_template_yaml diff --git a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc.yaml b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc.yaml index 4ac669b5..1100b539 100644 --- a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc.yaml +++ b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc.yaml @@ -8,7 +8,7 @@ doc_to_text: !function utils.mix_evals_image2text_doc_to_text doc_to_target: "{{reference_answer}}" generation_kwargs: - max_new_tokens: 5 + max_new_tokens: 1024 metric_list: - metric: exact_match diff --git a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc_hard.yaml b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc_hard.yaml index 58fae82a..8fd90184 100644 --- a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc_hard.yaml +++ b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc_hard.yaml @@ -8,7 +8,7 @@ doc_to_text: !function utils.mix_evals_image2text_doc_to_text doc_to_target: "{{reference_answer}}" generation_kwargs: - max_new_tokens: 5 + max_new_tokens: 1024 metric_list: - metric: exact_match diff --git a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform.yaml b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform.yaml index 30a50046..366a2bea 100644 --- a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform.yaml +++ b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform.yaml @@ -12,7 +12,7 @@ metric_list: higher_is_better: true generation_kwargs: - max_new_tokens: 16 + max_new_tokens: 1024 include: _default_template_yaml diff --git a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform_hard.yaml b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform_hard.yaml index 37690431..059d2b28 100644 --- a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform_hard.yaml +++ b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform_hard.yaml @@ -12,7 +12,7 @@ metric_list: higher_is_better: true generation_kwargs: - max_new_tokens: 16 + max_new_tokens: 1024 include: _default_template_yaml diff --git a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc.yaml b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc.yaml index 1e36fab9..c94a0c5a 100644 --- a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc.yaml +++ b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc.yaml @@ -8,7 +8,7 @@ doc_to_text: !function utils.mix_evals_video2text_doc_to_text doc_to_target: "{{reference_answer}}" generation_kwargs: - max_new_tokens: 5 + max_new_tokens: 1024 metric_list: - metric: exact_match diff --git a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc_hard.yaml b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc_hard.yaml index 97754a67..9cc3f2da 100644 --- a/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc_hard.yaml +++ b/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc_hard.yaml @@ -8,7 +8,7 @@ doc_to_text: !function utils.mix_evals_video2text_doc_to_text doc_to_target: "{{reference_answer}}" generation_kwargs: - max_new_tokens: 5 + max_new_tokens: 1024 metric_list: - metric: exact_match