From 48eec7c6b27284a503dc1ff08f22b73a643f3cbb Mon Sep 17 00:00:00 2001 From: jannisborn Date: Wed, 22 May 2024 18:18:43 +0200 Subject: [PATCH] chore: populate max_retries to top level --- paperscraper/get_dumps/biorxiv.py | 14 +++++++++----- paperscraper/get_dumps/chemrxiv.py | 11 ++++++----- paperscraper/get_dumps/medrxiv.py | 14 +++++++++----- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/paperscraper/get_dumps/biorxiv.py b/paperscraper/get_dumps/biorxiv.py index 2dc658f..aaa004a 100755 --- a/paperscraper/get_dumps/biorxiv.py +++ b/paperscraper/get_dumps/biorxiv.py @@ -1,4 +1,5 @@ """Dump bioRxiv data in JSONL format.""" + import json import os from datetime import datetime @@ -20,6 +21,7 @@ def biorxiv( begin_date: Optional[str] = None, end_date: Optional[str] = None, save_path: str = save_path, + max_retries: int = 10, ): """Fetches papers from biorxiv based on time range, i.e., begin_date and end_date. If the begin_date and end_date are not provided, papers will be fetched from biorxiv @@ -27,15 +29,17 @@ def biorxiv( stored in jsonl format in save_path. Args: + begin_date (str, optional): begin date expressed as YYYY-MM-DD. + Defaults to None, i.e., earliest possible. + end_date (str, optional): end date expressed as YYYY-MM-DD. + Defaults to None, i.e., today. save_path (str, optional): Path where the dump is stored. Defaults to save_path. - begin_date (Optional[str], optional): begin date expressed as YYYY-MM-DD. - Defaults to None. - end_date (Optional[str], optional): end date expressed as YYYY-MM-DD. - Defaults to None. + max_retries (int, optional): Number of retries when API shows connection issues. + Defaults to 10. """ # create API client - api = BioRxivApi() + api = BioRxivApi(max_retries=max_retries) # dump all papers with open(save_path, "w") as fp: diff --git a/paperscraper/get_dumps/chemrxiv.py b/paperscraper/get_dumps/chemrxiv.py index 00feac2..d3c8080 100644 --- a/paperscraper/get_dumps/chemrxiv.py +++ b/paperscraper/get_dumps/chemrxiv.py @@ -1,4 +1,5 @@ """Dump chemRxiv data in JSONL format.""" + import logging import os import sys @@ -28,11 +29,11 @@ def chemrxiv( stored in jsonl format in save_path. Args: - begin_date (Optional[str]): begin date expressed as YYYY-MM-DD. - Defaults to None. - end_date (Optional[str]): end date expressed as YYYY-MM-DD. - Defaults to None. - save_path (str): Path where the dump is stored. + begin_date (str, optional): begin date expressed as YYYY-MM-DD. + Defaults to None, i.e., earliest possible. + end_date (str, optional): end date expressed as YYYY-MM-DD. + Defaults to None, i.e., today. + save_path (str, optional): Path where the dump is stored. Defaults to save_path. """ diff --git a/paperscraper/get_dumps/medrxiv.py b/paperscraper/get_dumps/medrxiv.py index fcbf1b5..1cfa6e7 100755 --- a/paperscraper/get_dumps/medrxiv.py +++ b/paperscraper/get_dumps/medrxiv.py @@ -1,4 +1,5 @@ """Dump medrxiv data in JSONL format.""" + import json import os from datetime import datetime @@ -18,6 +19,7 @@ def medrxiv( begin_date: Optional[str] = None, end_date: Optional[str] = None, save_path: str = save_path, + max_retries: int = 10, ): """Fetches papers from medrxiv based on time range, i.e., begin_date and end_date. If the begin_date and end_date are not provided, then papers will be fetched from @@ -25,15 +27,17 @@ def medrxiv( papers will be stored in jsonl format in save_path. Args: + begin_date (str, optional): begin date expressed as YYYY-MM-DD. + Defaults to None, i.e., earliest possible. + end_date (str, optional): end date expressed as YYYY-MM-DD. + Defaults to None, i.e., today. save_path (str, optional): Path where the dump is stored. Defaults to save_path. - begin_date (Optional[str], optional): begin date expressed as YYYY-MM-DD. - Defaults to None. - end_date (Optional[str], optional): end date expressed as YYYY-MM-DD. - Defaults to None. + max_retries (int, optional): Number of retries when API shows connection issues. + Defaults to 10. """ # create API client - api = MedRxivApi() + api = MedRxivApi(max_retries=max_retries) # dump all papers with open(save_path, "w") as fp: for index, paper in enumerate(