Skip to content

Commit

Permalink
chore:formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
jannisborn committed Jul 2, 2024
1 parent e07d330 commit 0acb8e0
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 46 deletions.
3 changes: 1 addition & 2 deletions paperscraper/arxiv/arxiv.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from typing import Dict, List, Union

import arxiv
import pandas as pd
from tqdm import tqdm

import arxiv

from ..utils import dump_papers
from .utils import get_query_from_keywords

Expand Down
1 change: 1 addition & 0 deletions paperscraper/get_dumps/utils/chemrxiv/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Misc utils to download chemRxiv dump"""

import json
import logging
import os
Expand Down
16 changes: 9 additions & 7 deletions paperscraper/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ def calculate_fuzziness_score(row):

# Prepare the final result
results = [
row.to_dict()
if return_all
else {
"journal": row["journal"],
"factor": row["factor"],
"score": row["score"],
}
(
row.to_dict()
if return_all
else {
"journal": row["journal"],
"factor": row["factor"],
"score": row["score"],
}
)
for _, row in matched_df.iterrows()
]

Expand Down
6 changes: 4 additions & 2 deletions paperscraper/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,10 @@ def plot_single(
plt.bar(ind, np.zeros((len(ind),)), color="k", bottom=bottom)
)

plt.ylabel("Counts", size=17) if not logscale else plt.ylabel(
"Counts (log scale)", size=17
(
plt.ylabel("Counts", size=17)
if not logscale
else plt.ylabel("Counts (log scale)", size=17)
)
plt.xlabel("Years", size=17)
plt.title(title_text, size=17)
Expand Down
37 changes: 20 additions & 17 deletions paperscraper/pubmed/tests/test_pubmed.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,34 @@
from paperscraper.pubmed import get_and_dump_pubmed_papers, get_pubmed_papers
from paperscraper.pubmed.utils import get_query_from_keywords_and_date
import os
import pytest
from unittest.mock import patch
import tempfile
from unittest.mock import patch

import pytest

from paperscraper.pubmed import get_and_dump_pubmed_papers, get_pubmed_papers
from paperscraper.pubmed.utils import get_query_from_keywords_and_date

KEYWORDS = [['machine learning', 'deep learning'], ['zoology']]
KEYWORDS = [["machine learning", "deep learning"], ["zoology"]]


class TestPubMed:

def test_get_and_dump_pubmed(self):
with tempfile.TemporaryDirectory() as temp_dir:
output_filepath = os.path.join(temp_dir, 'tmp.jsonl')
output_filepath = os.path.join(temp_dir, "tmp.jsonl")
get_and_dump_pubmed_papers(KEYWORDS, output_filepath=output_filepath)
assert os.path.exists(output_filepath), "File was not created"

def test_email(self):
query = get_query_from_keywords_and_date(KEYWORDS, start_date='2020/07/20')
df = get_pubmed_papers(query, fields=['emails', 'title', 'authors'])
assert 'emails' in df.columns

query = get_query_from_keywords_and_date(KEYWORDS, end_date='2020/07/20')
df = get_pubmed_papers(query, fields=['emails', 'title', 'authors'])
assert 'emails' in df.columns
def test_email(self):
query = get_query_from_keywords_and_date(KEYWORDS, start_date="2020/07/20")
df = get_pubmed_papers(query, fields=["emails", "title", "authors"])
assert "emails" in df.columns

query = get_query_from_keywords_and_date(KEYWORDS, start_date='2020/07/10', end_date='2020/07/20')
df = get_pubmed_papers(query, fields=['emails', 'title', 'authors'])
assert 'emails' in df.columns
query = get_query_from_keywords_and_date(KEYWORDS, end_date="2020/07/20")
df = get_pubmed_papers(query, fields=["emails", "title", "authors"])
assert "emails" in df.columns

query = get_query_from_keywords_and_date(
KEYWORDS, start_date="2020/07/10", end_date="2020/07/20"
)
df = get_pubmed_papers(query, fields=["emails", "title", "authors"])
assert "emails" in df.columns
7 changes: 4 additions & 3 deletions paperscraper/scholar/tests/test_scholar.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import functools
import logging

import pandas as pd
import pytest
from scholarly._proxy_generator import MaxTriesExceededException
import functools

from paperscraper.scholar import (
get_and_dump_scholar_papers,
Expand Down Expand Up @@ -58,8 +59,8 @@ def test_basic_search(self):
]
]
)

@handle_scholar_exception
def test_bad_search(self):
results = get_scholar_papers("GT4SDfsdhfiobfpsdfbsdp")
assert len(results) == 0
assert len(results) == 0
3 changes: 2 additions & 1 deletion paperscraper/tests/test_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def setup_medrxiv(self):
@pytest.fixture
def setup_biorxiv(self):
return lambda: biorxiv(max_retries=2)

@pytest.fixture
def setup_chemrxiv(self):
return chemrxiv
Expand Down Expand Up @@ -78,4 +78,5 @@ def test_arxiv_dumping(self):

def test_dump_existence(self):
from paperscraper.load_dumps import QUERY_FN_DICT

assert len(QUERY_FN_DICT) > 2
2 changes: 1 addition & 1 deletion paperscraper/tests/test_pdf.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import logging
import os
import shutil
from pathlib import Path
from unittest.mock import MagicMock, mock_open, patch
import shutil

import pytest

Expand Down
26 changes: 13 additions & 13 deletions paperscraper/xrxiv/tests/test_xrxiv.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
from paperscraper.xrxiv.xrxiv_query import XRXivQuery
from paperscraper.get_dumps import medrxiv
import os

from paperscraper.get_dumps import medrxiv
from paperscraper.xrxiv.xrxiv_query import XRXivQuery



covid19 = ['COVID-19', 'SARS-CoV-2']
ai = ['Artificial intelligence', 'Deep learning', 'Machine learning']
mi = ['Medical imaging']
covid19 = ["COVID-19", "SARS-CoV-2"]
ai = ["Artificial intelligence", "Deep learning", "Machine learning"]
mi = ["Medical imaging"]


class TestXRXiv:

def test_get_medrxiv(self):
medrxiv(begin_date="2020-05-01", end_date="2020-05-10", save_path='medrix_tmp_dump.jsonl')

medrxiv(
begin_date="2020-05-01",
end_date="2020-05-10",
save_path="medrix_tmp_dump.jsonl",
)

def test_xriv_querier(self):
querier = XRXivQuery('medrix_tmp_dump.jsonl')
querier = XRXivQuery("medrix_tmp_dump.jsonl")
query = [covid19, ai, mi]
querier.search_keywords(query, output_filepath='covid19_ai_imaging.jsonl')
assert os.path.exists('covid19_ai_imaging.jsonl')

querier.search_keywords(query, output_filepath="covid19_ai_imaging.jsonl")
assert os.path.exists("covid19_ai_imaging.jsonl")
1 change: 1 addition & 0 deletions paperscraper/xrxiv/xrxiv_query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Query dumps from bioRxiv and medRXiv."""

import logging
import sys
from typing import List, Union
Expand Down

0 comments on commit 0acb8e0

Please sign in to comment.