Skip to content

Commit

Permalink
fix: overwrite file when input and output is the same #3
Browse files Browse the repository at this point in the history
  • Loading branch information
4gac committed May 12, 2024
1 parent cc2cd71 commit 7ce6fe4
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions src/lang_detect.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
from collections import Counter
import shutil
import os, sys, json
import tempfile
from langdetect import detect
from pdfixsdk.Pdfix import *
from pdfixsdk.Pdfix import GetPdfix
Expand Down Expand Up @@ -75,16 +77,25 @@ def detect_pdf_lang(in_path: str, out_path: str):

if out_path.endswith(".pdf"):
doc.SetLang(most_common_lang[0][0])
doc.Save(out_path, kSaveFull)

# save pdf to temporary file
temp_file = tempfile.NamedTemporaryFile()
doc.Save(temp_file.name, kSaveFull)

# close pdf
doc.Close()

# copy temp file to output path
shutil.copyfile(temp_file.name, out_path)

temp_file.close()

else:
if not os.path.exists(os.path.dirname(out_path)):
os.makedirs(os.path.dirname(out_path))
with open(out_path, "w") as f:
f.write(most_common_lang[0][0])

doc.Close()


def main():
parser = argparse.ArgumentParser()
Expand Down

0 comments on commit 7ce6fe4

Please sign in to comment.