Skip to content

Commit

Permalink
feat: support multi-language description
Browse files Browse the repository at this point in the history
  • Loading branch information
4gac committed Nov 6, 2024
1 parent fadea41 commit f9bb758
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 6 deletions.
24 changes: 23 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,26 @@
"icon": "add_photo_alternate",
"category": "Tags",
"configurations": [
{
"program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --lang \"${lang}\"",
"name": "Alt Text with OpenAI",
"desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI"
},
{
"program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --lang \"${lang}\" --overwrite",
"name": "Alt Text with OpenAI",
"desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI"
},
{
"program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\"",
"name": "Alt Text with OpenAI",
"desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI"
}
},
{
"program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --overwrite",
"name": "Alt Text with OpenAI",
"desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI"
},
],
"args": [
{
Expand All @@ -39,6 +54,13 @@
"desc": "OpenAI API Key",
"type": "string",
"value": ""
},
{
"title": "Alternate description language",
"name": "lang",
"desc": "Alternate description language",
"type": "string",
"value": ""
}
]
}
Expand Down
4 changes: 2 additions & 2 deletions src/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def encode_image(image_path: str):
return base64.b64encode(image_file.read()).decode("utf-8")


def alt_description(img_path: str, api_key: str):
def alt_description(img_path: str, api_key: str, lang: str):
# Getting the base64 string
base64_image = encode_image(img_path)
# print("Using api key: {}".format(api_key))
Expand All @@ -23,7 +23,7 @@ def alt_description(img_path: str, api_key: str):
"content": [
{
"type": "text",
"text": "Generate alternate description for the image",
"text": f"Generate alternate description for the image in {lang} language",
},
{
"type": "image_url",
Expand Down
8 changes: 8 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ def main():
default=False,
help="Overwrite alternate text if already present in the tag",
)
pars_detect.add_argument(
"--lang",
type=str,
required=False,
default="en",
help="Alternate description laguage",
)

try:
args = parser.parse_args()
Expand Down Expand Up @@ -100,6 +107,7 @@ def main():
args.key,
args.openai,
args.overwrite,
args.lang,
)
# print(desc)
except Exception as e:
Expand Down
10 changes: 7 additions & 3 deletions src/process_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def browse_figure_tags(
doc: PdfDoc,
api_key: str,
overwrite: bool,
lang: str,
) -> None:
count = parent.GetNumChildren()
struct_tree = doc.GetStructTree()
Expand All @@ -120,9 +121,9 @@ def browse_figure_tags(
child_elem = struct_tree.GetStructElementFromObject(parent.GetChildObject(i))
if child_elem.GetType(True) == "Figure":
# process figure element
update_image_alt(child_elem, doc, api_key, overwrite)
update_image_alt(child_elem, doc, api_key, overwrite, lang)
else:
browse_figure_tags(child_elem, doc, api_key, overwrite)
browse_figure_tags(child_elem, doc, api_key, overwrite, lang)


def alt_text(
Expand All @@ -132,6 +133,7 @@ def alt_text(
license_key: str,
api_key: str,
overwrite: bool,
lang: str,
) -> None:
"""Run OpenAI for alternate text description.
Expand All @@ -149,6 +151,8 @@ def alt_text(
OpenAI API key.
overwrite : bool
Ovewrite alternate text if already present.
lang : str
Alternate description language.
"""
pdfix = GetPdfix()
Expand All @@ -172,7 +176,7 @@ def alt_text(

child_elem = struct_tree.GetStructElementFromObject(struct_tree.GetChildObject(0))
try:
browse_figure_tags(child_elem, doc, api_key, overwrite)
browse_figure_tags(child_elem, doc, api_key, overwrite, lang)
except Exception as e:
raise e

Expand Down

0 comments on commit f9bb758

Please sign in to comment.