From 9fbc61c40bb16da9263ba1e80d6b251b485037d0 Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Tue, 16 Jan 2024 21:52:17 +0100 Subject: [PATCH] html2pdf: add more debugging lines by using a custom HTTPClient for wdm --- .gitignore | 3 ++ strictdoc/export/html2pdf/html2pdf.py | 43 ++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f2f2eba3d..a94861f1f 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ strictdoc-project.github.io/ ### StrictDoc's developer test ### __*.sdoc + +### webdriver_manager: cache with downloaded Chrome Driver binaries ### +strictdoc/export/html2pdf/.wdm diff --git a/strictdoc/export/html2pdf/html2pdf.py b/strictdoc/export/html2pdf/html2pdf.py index 39a383108..9b8d1c164 100644 --- a/strictdoc/export/html2pdf/html2pdf.py +++ b/strictdoc/export/html2pdf/html2pdf.py @@ -4,11 +4,42 @@ import json import os.path import pathlib +from typing import Optional +import requests +from requests import Response from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager +from webdriver_manager.core.download_manager import WDMDownloadManager +from webdriver_manager.core.http import HttpClient + + +class HTML2PDF_HTTPClient(HttpClient): + def get(self, url, params=None, **kwargs) -> Response: + """ + Add you own logic here like session or proxy etc. + """ + last_error: Optional[Exception] = None + for attempt in range(1, 3): + print( # noqa: T201 + f"HTML2PDF_HTTPClient: sending GET request attempt {attempt}: {url}" + ) + try: + return requests.get(url, params, timeout=(5, 5), **kwargs) + except requests.exceptions.ConnectTimeout as connect_timeout_: + last_error = connect_timeout_ + except requests.exceptions.ReadTimeout as read_timeout_: + last_error = read_timeout_ + except Exception as exception_: + raise AssertionError( + "HTML2PDF_HTTPClient: unknown exception", exception_ + ) from None + print( # noqa: T201 + f"HTML2PDF_HTTPClient: " + f"failed to get response for URL: {url} with error: {last_error}" + ) def get_inches_from_millimeters(mm: float) -> float: @@ -68,7 +99,13 @@ def get_pdf_from_html(driver, url) -> bytes: def create_webdriver(): print("HTML2PDF: creating Chrome Driver service.", flush=True) # noqa: T201 - path_to_chrome = ChromeDriverManager().install() + + http_client = HTML2PDF_HTTPClient() + download_manager = WDMDownloadManager(http_client) + path_to_chrome = ChromeDriverManager( + download_manager=download_manager + ).install() + print(f"HTML2PDF: Chrome Driver available at path: {path_to_chrome}") # noqa: T201 service = Service(path_to_chrome) @@ -96,6 +133,10 @@ def create_webdriver(): def main(): + # By default, all driver binaries are saved to user.home/.wdm folder. + # You can override this setting and save binaries to project.root/.wdm. + os.environ["WDM_LOCAL"] = "1" + parser = argparse.ArgumentParser(description="HTML2PDF printer script.") parser.add_argument("paths", help="Paths to input HTML file.") args = parser.parse_args()