Skip to content

Commit

Permalink
Merge pull request strictdoc-project#1599 from strictdoc-project/stan…
Browse files Browse the repository at this point in the history
…islaw/html

html2pdf: add more debugging lines by using a custom HTTPClient for wdm
  • Loading branch information
stanislaw authored Jan 16, 2024
2 parents bb8f623 + 9fbc61c commit 18a81e3
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ strictdoc-project.github.io/

### StrictDoc's developer test ###
__*.sdoc

### webdriver_manager: cache with downloaded Chrome Driver binaries ###
strictdoc/export/html2pdf/.wdm
43 changes: 42 additions & 1 deletion strictdoc/export/html2pdf/html2pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,42 @@
import json
import os.path
import pathlib
from typing import Optional

import requests
from requests import Response
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.download_manager import WDMDownloadManager
from webdriver_manager.core.http import HttpClient


class HTML2PDF_HTTPClient(HttpClient):
def get(self, url, params=None, **kwargs) -> Response:
"""
Add you own logic here like session or proxy etc.
"""
last_error: Optional[Exception] = None
for attempt in range(1, 3):
print( # noqa: T201
f"HTML2PDF_HTTPClient: sending GET request attempt {attempt}: {url}"
)
try:
return requests.get(url, params, timeout=(5, 5), **kwargs)
except requests.exceptions.ConnectTimeout as connect_timeout_:
last_error = connect_timeout_
except requests.exceptions.ReadTimeout as read_timeout_:
last_error = read_timeout_
except Exception as exception_:
raise AssertionError(
"HTML2PDF_HTTPClient: unknown exception", exception_
) from None
print( # noqa: T201
f"HTML2PDF_HTTPClient: "
f"failed to get response for URL: {url} with error: {last_error}"
)


def get_inches_from_millimeters(mm: float) -> float:
Expand Down Expand Up @@ -68,7 +99,13 @@ def get_pdf_from_html(driver, url) -> bytes:

def create_webdriver():
print("HTML2PDF: creating Chrome Driver service.", flush=True) # noqa: T201
path_to_chrome = ChromeDriverManager().install()

http_client = HTML2PDF_HTTPClient()
download_manager = WDMDownloadManager(http_client)
path_to_chrome = ChromeDriverManager(
download_manager=download_manager
).install()
print(f"HTML2PDF: Chrome Driver available at path: {path_to_chrome}") # noqa: T201

service = Service(path_to_chrome)

Expand Down Expand Up @@ -96,6 +133,10 @@ def create_webdriver():


def main():
# By default, all driver binaries are saved to user.home/.wdm folder.
# You can override this setting and save binaries to project.root/.wdm.
os.environ["WDM_LOCAL"] = "1"

parser = argparse.ArgumentParser(description="HTML2PDF printer script.")
parser.add_argument("paths", help="Paths to input HTML file.")
args = parser.parse_args()
Expand Down

0 comments on commit 18a81e3

Please sign in to comment.