diff --git a/server/text_generation_server/models/custom_modeling/idefics_image_processing.py b/server/text_generation_server/models/custom_modeling/idefics_image_processing.py index 21aa3ff3c58..4760ae6fc62 100644 --- a/server/text_generation_server/models/custom_modeling/idefics_image_processing.py +++ b/server/text_generation_server/models/custom_modeling/idefics_image_processing.py @@ -201,8 +201,13 @@ def fetch_images(self, image_url_or_urls: Union[str, List[str]]): response = requests.get(image_url_or_urls, stream=True, headers=headers, timeout=(1, 5)) response.raise_for_status() content = response.content - else: + elif image.startswith("data:"): + # https://stackoverflow.com/questions/17090571/is-there-a-way-to-set-background-image-as-a-base64-encoded-image + #  + image = image.split(",")[-1] content = base64.b64decode(image) + else: + raise ValueError(f"Unrecognized image {image}") try: image = Image.open(BytesIO(content)) diff --git a/server/text_generation_server/models/custom_modeling/idefics_processing.py b/server/text_generation_server/models/custom_modeling/idefics_processing.py index 0fbcbeeba65..98e43a27b69 100644 --- a/server/text_generation_server/models/custom_modeling/idefics_processing.py +++ b/server/text_generation_server/models/custom_modeling/idefics_processing.py @@ -112,6 +112,11 @@ def is_url(string): result = urlparse(string) return all([result.scheme, result.netloc]) +def is_image(string): + """Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately + invalidated the url""" + return is_url(string) or string.startswith("data:") + class IdeficsProcessor(ProcessorMixin): r""" @@ -314,7 +319,7 @@ def image_tokens(last_was_image): if isinstance(item, str): item = item.strip(" ") - if is_url(item): + if is_image(item): image = self.image_processor.fetch_images(item) full_text += image_tokens(last_was_image) image_objects.append(image) @@ -339,6 +344,7 @@ def image_tokens(last_was_image): image_objects = self.image_processor(image_objects, transform=transform) + text_encoding = self.tokenizer( text=full_text, add_special_tokens=False,