From 449db151401294da67b026641eddebeef1e984c9 Mon Sep 17 00:00:00 2001 From: Paul Hebble Date: Sat, 19 Oct 2024 09:49:13 -0500 Subject: [PATCH 1/3] Stop attempting archive.org download counts after timeout --- netkan/netkan/download_counter.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/netkan/netkan/download_counter.py b/netkan/netkan/download_counter.py index d226bcc..40b6c90 100644 --- a/netkan/netkan/download_counter.py +++ b/netkan/netkan/download_counter.py @@ -9,6 +9,7 @@ from datetime import date import requests +from requests.exceptions import ConnectTimeout from .utils import repo_file_add_or_changed, legacy_read_text from .repos import CkanMetaRepo @@ -183,6 +184,7 @@ class InternetArchiveBatchedQuery: def __init__(self) -> None: self.ids: Dict[str, str] = {} + self.connect_timed_out = False def empty(self) -> bool: return len(self.ids) == 0 @@ -196,15 +198,24 @@ def add(self, ckan: Ckan) -> None: def get_result(self, counts: Optional[Dict[str, int]] = None) -> Dict[str, int]: if counts is None: counts = {} - result = requests.get(self.IARCHIVE_API + ','.join(self.ids.values()), - timeout=60).json() - for ckan_ident, ia_ident in self.ids.items(): - try: - counts[ckan_ident] = counts.get(ckan_ident, 0) + result[ia_ident]['all_time'] - except KeyError as exc: - logging.error('InternetArchive id not found in downloads result: %s', - ia_ident, exc_info=exc) - return counts + if self.connect_timed_out: + return counts + try: + result = requests.get(self.IARCHIVE_API + ','.join(self.ids.values()), + timeout=60).json() + for ckan_ident, ia_ident in self.ids.items(): + try: + counts[ckan_ident] = counts.get(ckan_ident, 0) + result[ia_ident]['all_time'] + except KeyError as exc: + logging.error('InternetArchive id not found in downloads result: %s', + ia_ident, exc_info=exc) + return counts + except ConnectTimeout as exc: + # Cleanly turn off archive.org counting while the downtime continues + logging.error('Failed to get counts from archive.org', + exc_info=exc) + self.connect_timed_out = True + return counts class SourceForgeQuerier: From 0d72ef72acb6fe217f0615d88d8cb33d95ccf6b2 Mon Sep 17 00:00:00 2001 From: Paul Hebble Date: Sat, 19 Oct 2024 09:56:51 -0500 Subject: [PATCH 2/3] Try another type check --- netkan/netkan/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netkan/netkan/metadata.py b/netkan/netkan/metadata.py index f9b1e5b..45e660b 100644 --- a/netkan/netkan/metadata.py +++ b/netkan/netkan/metadata.py @@ -366,7 +366,7 @@ def version(self) -> Version: def download(self) -> str: download = self._raw.get('download') if isinstance(download, list): - return download[0] if len(download) > 0 else None + return download[0] if isinstance(download[0], str) and len(download) > 0 else None return download # Provide all downloads with alternate property in case we need them, From 73f661a2ecb55b7d3e20be476a31cf5cfb213b0c Mon Sep 17 00:00:00 2001 From: Paul Hebble Date: Sat, 19 Oct 2024 10:01:31 -0500 Subject: [PATCH 3/3] Don't return None for download --- netkan/netkan/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netkan/netkan/metadata.py b/netkan/netkan/metadata.py index 45e660b..daac1a5 100644 --- a/netkan/netkan/metadata.py +++ b/netkan/netkan/metadata.py @@ -366,7 +366,7 @@ def version(self) -> Version: def download(self) -> str: download = self._raw.get('download') if isinstance(download, list): - return download[0] if isinstance(download[0], str) and len(download) > 0 else None + return download[0] if isinstance(download[0], str) and len(download) > 0 else '' return download # Provide all downloads with alternate property in case we need them,