chore: populate max_retries to top level

jannisborn · May 22, 2024 · 48eec7c · 48eec7c
1 parent 377bc2c
commit 48eec7c
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 15 deletions.
diff --git a/paperscraper/get_dumps/biorxiv.py b/paperscraper/get_dumps/biorxiv.py
@@ -1,4 +1,5 @@
 """Dump bioRxiv data in JSONL format."""
+
 import json
 import os
 from datetime import datetime
@@ -20,22 +21,25 @@ def biorxiv(
     begin_date: Optional[str] = None,
     end_date: Optional[str] = None,
     save_path: str = save_path,
+    max_retries: int = 10,
 ):
     """Fetches papers from biorxiv based on time range, i.e., begin_date and end_date.
     If the begin_date and end_date are not provided, papers will be fetched from biorxiv
     from the launch date of biorxiv until the current date. The fetched papers will be
     stored in jsonl format in save_path.
 
     Args:
+        begin_date (str, optional): begin date expressed as YYYY-MM-DD.
+            Defaults to None, i.e., earliest possible.
+        end_date (str, optional): end date expressed as YYYY-MM-DD.
+            Defaults to None, i.e., today.
         save_path (str, optional): Path where the dump is stored.
             Defaults to save_path.
-        begin_date (Optional[str], optional): begin date expressed as YYYY-MM-DD.
-            Defaults to None.
-        end_date (Optional[str], optional): end date expressed as YYYY-MM-DD.
-            Defaults to None.
+        max_retries (int, optional): Number of retries when API shows connection issues.
+            Defaults to 10.
     """
     # create API client
-    api = BioRxivApi()
+    api = BioRxivApi(max_retries=max_retries)
 
     # dump all papers
     with open(save_path, "w") as fp:

diff --git a/paperscraper/get_dumps/chemrxiv.py b/paperscraper/get_dumps/chemrxiv.py
@@ -1,4 +1,5 @@
 """Dump chemRxiv data in JSONL format."""
+
 import logging
 import os
 import sys
@@ -28,11 +29,11 @@ def chemrxiv(
     stored in jsonl format in save_path.
 
     Args:
-        begin_date (Optional[str]): begin date expressed as YYYY-MM-DD.
-            Defaults to None.
-        end_date (Optional[str]): end date expressed as YYYY-MM-DD.
-            Defaults to None.
-        save_path (str): Path where the dump is stored.
+        begin_date (str, optional): begin date expressed as YYYY-MM-DD.
+            Defaults to None, i.e., earliest possible.
+        end_date (str, optional): end date expressed as YYYY-MM-DD.
+            Defaults to None, i.e., today.
+        save_path (str, optional): Path where the dump is stored.
             Defaults to save_path.
     """
 

diff --git a/paperscraper/get_dumps/medrxiv.py b/paperscraper/get_dumps/medrxiv.py
@@ -1,4 +1,5 @@
 """Dump medrxiv data in JSONL format."""
+
 import json
 import os
 from datetime import datetime
@@ -18,22 +19,25 @@ def medrxiv(
     begin_date: Optional[str] = None,
     end_date: Optional[str] = None,
     save_path: str = save_path,
+    max_retries: int = 10,
 ):
     """Fetches papers from medrxiv based on time range, i.e., begin_date and end_date.
     If the begin_date and end_date are not provided, then papers will be fetched from
     medrxiv starting from the launch date of medrxiv until current date. The fetched
     papers will be stored in jsonl format in save_path.
 
     Args:
+        begin_date (str, optional): begin date expressed as YYYY-MM-DD.
+            Defaults to None, i.e., earliest possible.
+        end_date (str, optional): end date expressed as YYYY-MM-DD.
+            Defaults to None, i.e., today.
         save_path (str, optional): Path where the dump is stored.
             Defaults to save_path.
-        begin_date (Optional[str], optional): begin date expressed as YYYY-MM-DD.
-            Defaults to None.
-        end_date (Optional[str], optional): end date expressed as YYYY-MM-DD.
-            Defaults to None.
+        max_retries (int, optional): Number of retries when API shows connection issues.
+            Defaults to 10.
     """
     # create API client
-    api = MedRxivApi()
+    api = MedRxivApi(max_retries=max_retries)
     # dump all papers
     with open(save_path, "w") as fp:
         for index, paper in enumerate(