Added new task IllusionVQA (COLM2024) (#471)

* added_illusionvqa * illusionvqa_reformat
EvolvingLMMs-Lab · Dec 22, 2024 · 5b80d5f · 5b80d5f
1 parent dbdafb0
commit 5b80d5f
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 0 deletions.
diff --git a/docs/current_tasks.md b/docs/current_tasks.md
@@ -25,6 +25,7 @@
 - [GQA](https://cs.stanford.edu/people/dorarad/gqa/index.html) (gqa)
 - [GQA-ru](https://huggingface.co/datasets/deepvk/GQA-ru) (gqa_ru)
 - [II-Bench](https://github.com/II-Bench/II-Bench) (ii_bench)
+- [IllusionVQA](https://illusionvqa.github.io/) (illusionvqa)
 - [Infographic VQA](https://www.docvqa.org/datasets/infographicvqa) (infovqa)
   - Infographic VQA Validation (infovqa_val)
   - Infographic VQA Test (infovqa_test)

diff --git a/lmms_eval/tasks/illusionvqa/illusionvqa.yaml b/lmms_eval/tasks/illusionvqa/illusionvqa.yaml
@@ -0,0 +1,48 @@
+#https://illusionvqa.github.io/
+#python3 -m lmms_eval --model gpt4v --model_args model_version="gpt-4o",modality="image" --tasks illusionvqa
+
+tag: "illusionvqa"                  
+
+test_split: "test"                   
+fewshot_split: "train"                
+
+# For multiple choice tasks:
+output_type: "generate_until"
+
+
+doc_to_visual: "image"
+doc_to_text: !function utils.illusionvqa_doc_to_text
+doc_to_target: !function utils.illusionvqa_doc_to_target
+
+lmms_eval_specific_kwargs:
+  default:
+    prompt_format: mcq
+    pre_prompt: "You'll be given an image, an instruction and some options. You have to select the correct one. Do not explain your reasoning. Answer with only the letter which corresponds to the correct option. Do not repeat the entire answer."
+    post_prompt: ""
+
+
+generation_kwargs:
+  max_new_tokens: 16
+  temperature: 0
+  do_sample: False
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: 0
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "([A-Z])\\."
+
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+
+fewshot_config:
+  sampler: first_n
+
+num_fewshot: 0
diff --git a/lmms_eval/tasks/illusionvqa/illusionvqa_comprehension.yaml b/lmms_eval/tasks/illusionvqa/illusionvqa_comprehension.yaml
@@ -0,0 +1,3 @@
+include: illusionvqa.yaml
+task: illusionvqa_comprehension
+dataset_path: csebuetnlp/illusionVQA-Comprehension
diff --git a/lmms_eval/tasks/illusionvqa/illusionvqa_soft_localization.yaml b/lmms_eval/tasks/illusionvqa/illusionvqa_soft_localization.yaml
@@ -0,0 +1,3 @@
+include: illusionvqa.yaml
+task: illusionvqa_soft_localization
+dataset_path: csebuetnlp/illusionVQA-Soft-Localization
diff --git a/lmms_eval/tasks/illusionvqa/utils.py b/lmms_eval/tasks/illusionvqa/utils.py
@@ -0,0 +1,64 @@
+# adapted from ai2d/utils.py
+import re
+
+from lmms_eval.filters.extraction import ExtendedRegexFilter
+
+
+def illusionvqa_doc_to_text(doc, lmms_eval_specific_kwargs=None):
+    question, choices = doc["question"], doc["options"]
+    len_choices = len(choices)
+    post_prompt = lmms_eval_specific_kwargs["post_prompt"]
+    pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
+
+    options = [chr(ord("A") + i) for i in range(len_choices)]
+    choices_str = "\n".join([f"{option}. {choice}" for option, choice in zip(options, choices)])
+    return f"{pre_prompt}{question}\n{choices_str}{post_prompt}"
+
+
+def illusionvqa_doc_to_target(doc):
+    len_choices = len(doc["options"])
+    options = [chr(ord("A") + i) for i in range(len_choices)]
+    return options[doc["options"].index(doc["answer"])]
+
+
+class MultiChoiceRegexFilter(ExtendedRegexFilter):
+    def __init__(self, *args, **kwargs):
+        """
+        regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
+                        - step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
+                        - step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
+        group_select: Selects the (group_select)th match from the findall result.
+        ignore_case: Ignores the case during step 1 matching
+        ignore_punctuation: Remove the punctuation during step 1 matching
+        regexes_to_ignore: Remove these regexes during step 1 matching
+        """
+        super().__init__(*args, **kwargs)
+
+    def apply(self, resps, docs):
+        # here, we assume we have a list, in which each element is
+        # a list of model responses for some particular input/target pair.
+        # so we process each of these (same input/target response sets)
+        # independently (and keep them a list.)
+
+        filtered_resps = []
+
+        for r, doc in zip(resps, docs):
+            # Regex to directly extract the option letter from the model response
+            option_letter_regex = re.compile(r"^\s*([A-Z])\.")
+
+            # Process each response
+            filtered = []
+            for resp in r:
+                # Try to match the option letter at the start of the response
+                match = option_letter_regex.match(resp)
+                if match:
+                    # If a match is found, append the matched letter
+                    filtered.append(match.group(1))
+                else:
+                    # If no match, return the original response
+                    filtered.append(resp)
+
+            # Assuming we need the first response that matches or the original response
+            filtered_resps.append(filtered[0])
+
+        return filtered_resps