Configurable image output type

VikParuchuri · Dec 3, 2024 · ee96b54 · ee96b54
1 parent 619f5b0
commit ee96b54
Show file tree

Hide file tree

Showing 7 changed files with 11 additions and 7 deletions.
diff --git a/marker/output.py b/marker/output.py
@@ -38,4 +38,4 @@ def save_output(rendered: BaseModel, output_dir: str, fname_base: str):
         f.write(json.dumps(rendered.metadata, indent=2))
 
     for img_name, img in images.items():
-        img.save(os.path.join(output_dir, img_name), "PNG", optimize=False, compress_level=3)
+        img.save(os.path.join(output_dir, img_name), settings.OUTPUT_IMAGE_FORMAT)
diff --git a/marker/renderers/__init__.py b/marker/renderers/__init__.py
@@ -35,7 +35,7 @@ def extract_image(document: Document, image_id, to_base64=False):
         cropped = page_img.crop(image_box.bbox)
         if to_base64:
             image_buffer = io.BytesIO()
-            cropped.save(image_buffer, format='PNG')
+            cropped.save(image_buffer, format=settings.OUTPUT_IMAGE_FORMAT)
             cropped = base64.b64encode(image_buffer.getvalue()).decode(settings.OUTPUT_ENCODING)
         return cropped
 

diff --git a/marker/renderers/html.py b/marker/renderers/html.py
@@ -10,6 +10,9 @@
 
 # Ignore beautifulsoup warnings
 import warnings
+
+from marker.settings import settings
+
 warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
 
 
@@ -53,7 +56,7 @@ def extract_html(self, document, document_output, level=0):
             elif ref_block_id.block_type in self.image_blocks:
                 if self.extract_images:
                     image = self.extract_image(document, ref_block_id)
-                    image_name = f"{ref_block_id.to_path()}.png"
+                    image_name = f"{ref_block_id.to_path()}.{settings.OUTPUT_IMAGE_FORMAT.lower()}"
                     images[image_name] = image
                     ref.replace_with(BeautifulSoup(f"<p><img src='{image_name}'></p>", 'html.parser'))
                 else:

diff --git a/marker/settings.py b/marker/settings.py
@@ -16,6 +16,7 @@ class Settings(BaseSettings):
 
     # General
     OUTPUT_ENCODING: str = "utf-8"
+    OUTPUT_IMAGE_FORMAT: str = "JPEG"
 
     # General models
     TORCH_DEVICE: Optional[str] = None # Note: MPS device does not work for text detection, and will default to CPU

diff --git a/marker_app.py b/marker_app.py
@@ -44,10 +44,10 @@ def open_pdf(pdf_file):
 
 def img_to_html(img, img_alt):
     img_bytes = io.BytesIO()
-    img.save(img_bytes, format="PNG")
+    img.save(img_bytes, format=settings.OUTPUT_IMAGE_FORMAT)
     img_bytes = img_bytes.getvalue()
     encoded = base64.b64encode(img_bytes).decode()
-    img_html = f'<img src="data:image/png;base64,{encoded}" alt="{img_alt}" style="max-width: 100%;">'
+    img_html = f'<img src="data:image/{settings.OUTPUT_IMAGE_FORMAT.lower()};base64,{encoded}" alt="{img_alt}" style="max-width: 100%;">'
     return img_html
 
 

diff --git a/marker_server.py b/marker_server.py
@@ -110,7 +110,7 @@ async def _convert_pdf(params: CommonParams):
     encoded = {}
     for k, v in images.items():
         byte_stream = io.BytesIO()
-        v.save(byte_stream, format="PNG")
+        v.save(byte_stream, format=settings.OUTPUT_IMAGE_FORMAT)
         encoded[k] = base64.b64encode(byte_stream.getvalue()).decode(settings.OUTPUT_ENCODING)
 
     return {

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "marker-pdf"
-version = "1.0.1"
+version = "1.0.2"
 description = "Convert PDF to markdown with high speed and accuracy."
 authors = ["Vik Paruchuri <[email protected]>"]
 readme = "README.md"