From f9bb758c2739a15b6e764632c0cf0a55a5923067 Mon Sep 17 00:00:00 2001 From: 4gac Date: Wed, 6 Nov 2024 11:30:20 +0100 Subject: [PATCH] feat: support multi-language description --- config.json | 24 +++++++++++++++++++++++- src/ai.py | 4 ++-- src/main.py | 8 ++++++++ src/process_pdf.py | 10 +++++++--- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/config.json b/config.json index 9bffa44..84b18b0 100644 --- a/config.json +++ b/config.json @@ -10,11 +10,26 @@ "icon": "add_photo_alternate", "category": "Tags", "configurations": [ + { + "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --lang \"${lang}\"", + "name": "Alt Text with OpenAI", + "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" + }, + { + "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --lang \"${lang}\" --overwrite", + "name": "Alt Text with OpenAI", + "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" + }, { "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\"", "name": "Alt Text with OpenAI", "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" - } + }, + { + "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --overwrite", + "name": "Alt Text with OpenAI", + "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" + }, ], "args": [ { @@ -39,6 +54,13 @@ "desc": "OpenAI API Key", "type": "string", "value": "" + }, + { + "title": "Alternate description language", + "name": "lang", + "desc": "Alternate description language", + "type": "string", + "value": "" } ] } diff --git a/src/ai.py b/src/ai.py index 5171d1f..a9771f8 100644 --- a/src/ai.py +++ b/src/ai.py @@ -9,7 +9,7 @@ def encode_image(image_path: str): return base64.b64encode(image_file.read()).decode("utf-8") -def alt_description(img_path: str, api_key: str): +def alt_description(img_path: str, api_key: str, lang: str): # Getting the base64 string base64_image = encode_image(img_path) # print("Using api key: {}".format(api_key)) @@ -23,7 +23,7 @@ def alt_description(img_path: str, api_key: str): "content": [ { "type": "text", - "text": "Generate alternate description for the image", + "text": f"Generate alternate description for the image in {lang} language", }, { "type": "image_url", diff --git a/src/main.py b/src/main.py index 6c85564..4b211e9 100644 --- a/src/main.py +++ b/src/main.py @@ -64,6 +64,13 @@ def main(): default=False, help="Overwrite alternate text if already present in the tag", ) + pars_detect.add_argument( + "--lang", + type=str, + required=False, + default="en", + help="Alternate description laguage", + ) try: args = parser.parse_args() @@ -100,6 +107,7 @@ def main(): args.key, args.openai, args.overwrite, + args.lang, ) # print(desc) except Exception as e: diff --git a/src/process_pdf.py b/src/process_pdf.py index 22c7796..35a52df 100644 --- a/src/process_pdf.py +++ b/src/process_pdf.py @@ -111,6 +111,7 @@ def browse_figure_tags( doc: PdfDoc, api_key: str, overwrite: bool, + lang: str, ) -> None: count = parent.GetNumChildren() struct_tree = doc.GetStructTree() @@ -120,9 +121,9 @@ def browse_figure_tags( child_elem = struct_tree.GetStructElementFromObject(parent.GetChildObject(i)) if child_elem.GetType(True) == "Figure": # process figure element - update_image_alt(child_elem, doc, api_key, overwrite) + update_image_alt(child_elem, doc, api_key, overwrite, lang) else: - browse_figure_tags(child_elem, doc, api_key, overwrite) + browse_figure_tags(child_elem, doc, api_key, overwrite, lang) def alt_text( @@ -132,6 +133,7 @@ def alt_text( license_key: str, api_key: str, overwrite: bool, + lang: str, ) -> None: """Run OpenAI for alternate text description. @@ -149,6 +151,8 @@ def alt_text( OpenAI API key. overwrite : bool Ovewrite alternate text if already present. + lang : str + Alternate description language. """ pdfix = GetPdfix() @@ -172,7 +176,7 @@ def alt_text( child_elem = struct_tree.GetStructElementFromObject(struct_tree.GetChildObject(0)) try: - browse_figure_tags(child_elem, doc, api_key, overwrite) + browse_figure_tags(child_elem, doc, api_key, overwrite, lang) except Exception as e: raise e