diff --git a/lmms_eval/tasks/__init__.py b/lmms_eval/tasks/__init__.py index 3749248e..85537d0c 100755 --- a/lmms_eval/tasks/__init__.py +++ b/lmms_eval/tasks/__init__.py @@ -417,6 +417,8 @@ def _get_task_and_group(self, task_dir: str): "yaml_path": yaml_path, } elif self._config_is_group(config): + if f.endswith("mix_evals_image2text.yaml"): + print(config) # This is a group config tasks_and_groups[config["group"]] = { "type": "group", @@ -477,6 +479,7 @@ def _get_task_and_group(self, task_dir: str): else: self.logger.debug(f"File {f} in {root} could not be loaded as a task or group") + print(tasks_and_groups["mix_evals_image2text"]) return tasks_and_groups diff --git a/lmms_eval/tasks/mix_evals/image2text/_default_template_yaml b/lmms_eval/tasks/mix_evals/image2text/_default_template_yaml index c75156f4..ee3858f9 100644 --- a/lmms_eval/tasks/mix_evals/image2text/_default_template_yaml +++ b/lmms_eval/tasks/mix_evals/image2text/_default_template_yaml @@ -1,5 +1,6 @@ dataset_path: MixEval/MixEval-X dataset_kwargs: + video: true # a bit confusing, but this is because the official uses path to store image data, so we need to load it as a video dataset cache_dir: mix_evals_image2text lmms_eval_specific_kwargs: default: diff --git a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml index 053e13df..141c8c56 100644 --- a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml +++ b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml @@ -2,4 +2,3 @@ group: mix_evals_image2text task: - mix_evals_image2text_mc - mix_evals_image2text_freeform -# - mix_evals_video2text_openended \ No newline at end of file diff --git a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_openended.yaml b/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_openended.yaml deleted file mode 100644 index 5e05aea5..00000000 --- a/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_openended.yaml +++ /dev/null @@ -1,14 +0,0 @@ -include: _default_template_yaml -dataset_name: "open_ended" -task: "mix_evals_image2text_openended" -test_split: image2text -output_type: generate_until -doc_to_visual: !function utils.mix_evals_image2text_doc_to_visual -doc_to_text: !function utils.mix_evals_image2text_doc_to_text_open_convs -doc_to_target: "" -process_results: !function utils.mix_evals_video2text_process_results_open_convs - -metric_list: - - metric: submission - aggregation: !function utils.mix_evals_video2text_aggregate_gen - higher_is_better: true diff --git a/lmms_eval/tasks/mix_evals/image2text/utils.py b/lmms_eval/tasks/mix_evals/image2text/utils.py index 32333044..ea1b306d 100644 --- a/lmms_eval/tasks/mix_evals/image2text/utils.py +++ b/lmms_eval/tasks/mix_evals/image2text/utils.py @@ -304,7 +304,7 @@ def mix_evals_image2text_process_results_freeform(doc, result): def mix_evals_image2text_aggregate_submissions(results, args, task): now_date_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") - submission_file_name = f"mix_evals_video2text_{task}-{now_date_time}.json" + submission_file_name = f"mix_evals_image2text_{task}-{now_date_time}.json" path = file_utils.generate_submission_file(submission_file_name, args) with open(path, "w") as f: json.dump(results, f) @@ -365,8 +365,9 @@ def apply(self, resps, docs): # response.raise_for_status() # content =["choices"][0]["message"]["content"].strip() - content = response.choices[0].message.content.strip() - if content != "": + content = response.choices[0].message.content + if content: + content = content.strip() match = re.search(r"r'\b([A-Z])\.?\b'", content) if match: result = ord(match.group(1)) - ord("A")