Skip to content

Commit

Permalink
chore (translator): fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
reycn committed Nov 26, 2024
1 parent ea7280c commit 0e5101a
Showing 1 changed file with 61 additions and 35 deletions.
96 changes: 61 additions & 35 deletions pdf2zh/translator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import hashlib
import hmac
import html
import logging
import os
import re
import time
from datetime import UTC, datetime
from json import dumps, loads

import deepl
Expand All @@ -11,10 +15,6 @@
from azure.ai.translation.text import TextTranslationClient
from azure.core.credentials import AzureKeyCredential

import hmac
import hashlib
import time
from datetime import datetime,UTC

class BaseTranslator:
def __init__(self, service, lang_out, lang_in, model):
Expand Down Expand Up @@ -58,21 +58,20 @@ def translate(self, text):
result = html.unescape(re_result[0])
return result


class TencentTranslator(BaseTranslator):
def sign(self,key, msg):
def sign(self, key, msg):
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()

def __init__(self, service, lang_out, lang_in, model):
lang_out = "zh" if lang_out == "auto" else lang_out
lang_in = "en" if lang_in == "auto" else lang_in
super().__init__(service, lang_out, lang_in, model)
try:
server_url = (
"tmt.tencentcloudapi.com"
)
server_url = "tmt.tencentcloudapi.com"
self.secret_id = os.getenv("TENCENT_SECRET_ID")
self.secret_key = os.getenv("TENCENT_SECRET_KEY")

except KeyError as e:
missing_var = e.args[0]
raise ValueError(
Expand All @@ -84,51 +83,77 @@ def __init__(self, service, lang_out, lang_in, model):

def translate(self, text):
text = text[:5000]
data={
"SourceText":text,
"Source":self.lang_in,
"Target":self.lang_out,
"ProjectId":0
data = {
"SourceText": text,
"Source": self.lang_in,
"Target": self.lang_out,
"ProjectId": 0,
}
payloadx = dumps(data)
hashed_request_payload = hashlib.sha256(payloadx.encode("utf-8")).hexdigest()
canonical_request = ("POST" + "\n" +
"/" + "\n" +
"" + "\n" +
"content-type:application/json; charset=utf-8\nhost:tmt.tencentcloudapi.com\nx-tc-action:texttranslate\n" + "\n" +
"content-type;host;x-tc-action" + "\n" +
hashed_request_payload)
canonical_request = (
"POST"
+ "\n"
+ "/"
+ "\n"
+ ""
+ "\n"
+ "content-type:application/json; charset=utf-8\nhost:tmt.tencentcloudapi.com\nx-tc-action:texttranslate\n"
+ "\n"
+ "content-type;host;x-tc-action"
+ "\n"
+ hashed_request_payload
)

timestamp = int(time.time())
date = datetime.fromtimestamp(timestamp, UTC).strftime("%Y-%m-%d")
credential_scope = date + "/tmt/tc3_request"
hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
hashed_canonical_request = hashlib.sha256(
canonical_request.encode("utf-8")
).hexdigest()
algorithm = "TC3-HMAC-SHA256"
string_to_sign = (algorithm + "\n" +
str(timestamp) + "\n" +
credential_scope + "\n" +
hashed_canonical_request)
string_to_sign = (
algorithm
+ "\n"
+ str(timestamp)
+ "\n"
+ credential_scope
+ "\n"
+ hashed_canonical_request
)
secret_date = self.sign(("TC3" + self.secret_key).encode("utf-8"), date)
secret_service = self.sign(secret_date, "tmt")
secret_signing = self.sign(secret_service, "tc3_request")
signed_headers = "content-type;host;x-tc-action"
signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
authorization = (algorithm + " " +
"Credential=" + self.secret_id + "/" + credential_scope + ", " +
"SignedHeaders=" + signed_headers + ", " +
"Signature=" + signature)
signature = hmac.new(
secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256
).hexdigest()
authorization = (
algorithm
+ " "
+ "Credential="
+ self.secret_id
+ "/"
+ credential_scope
+ ", "
+ "SignedHeaders="
+ signed_headers
+ ", "
+ "Signature="
+ signature
)
self.headers = {
"Authorization": authorization,
"Content-Type": "application/json; charset=utf-8",
"Host": "tmt.tencentcloudapi.com",
"X-TC-Action": "TextTranslate",
"X-TC-Region":"ap-beijing",
"X-TC-Region": "ap-beijing",
"X-TC-Timestamp": str(timestamp),
"X-TC-Version": "2018-03-21"
"X-TC-Version": "2018-03-21",
}

response = self.session.post(
"https://"+self.base_link,
"https://" + self.base_link,
json=data,
headers=self.headers,
)
Expand All @@ -139,7 +164,7 @@ def translate(self, text):
raise ValueError("HTTP error: " + str(response.status_code))
# 2. Result test
try:
result = result['Response']['TargetText']
result = result["Response"]["TargetText"]
return result
except KeyError:
result = ""
Expand All @@ -149,6 +174,7 @@ def translate(self, text):
raise ValueError("Empty translation result")
return result


class DeepLXTranslator(BaseTranslator):
def __init__(self, service, lang_out, lang_in, model):
lang_out = "zh" if lang_out == "auto" else lang_out
Expand All @@ -168,7 +194,7 @@ def __init__(self, service, lang_out, lang_in, model):
) from e

self.session = requests.Session()
server_url=server_url.rstrip('/')
server_url = server_url.rstrip("/")
if auth_key:
self.base_link = f"{server_url}/{auth_key}/translate"
else:
Expand Down

0 comments on commit 0e5101a

Please sign in to comment.