From de81defa641df64ad787d8c6c90bbdd3049e02df Mon Sep 17 00:00:00 2001 From: jannisborn Date: Sun, 23 Jun 2024 10:51:51 +0200 Subject: [PATCH] refactor: Num_citations also supports recent scholarly --- paperscraper/scholar/scholar.py | 2 +- paperscraper/scholar/tests/test_scholar.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/paperscraper/scholar/scholar.py b/paperscraper/scholar/scholar.py index a05a738..475a428 100644 --- a/paperscraper/scholar/scholar.py +++ b/paperscraper/scholar/scholar.py @@ -105,7 +105,7 @@ def get_citations_from_title(title: str) -> int: title = '"' + title.strip() + '"' matches = scholarly.search_pubs(title) - counts = list(map(lambda p: int(p.bib["cites"]), matches)) + counts = list(map(lambda p: int(p["num_citations"]), matches)) if len(counts) == 0: logger.warning(f"Found no match for {title}.") return 0 diff --git a/paperscraper/scholar/tests/test_scholar.py b/paperscraper/scholar/tests/test_scholar.py index 36e0cfb..e4cc4ea 100644 --- a/paperscraper/scholar/tests/test_scholar.py +++ b/paperscraper/scholar/tests/test_scholar.py @@ -1,15 +1,29 @@ import logging import pandas as pd -import pytest -from paperscraper.scholar import get_scholar_papers +from paperscraper.scholar import ( + get_and_dump_scholar_papers, + get_citations_from_title, + get_scholar_papers, +) logging.disable(logging.INFO) class TestScholar: + def test_citations(self): + num = get_citations_from_title("GT4SD") + assert isinstance(num, int) + assert num > 0 + + def test_dump_search(self, tmpdir): + temp_dir = tmpdir.mkdir("scholar_papers") + output_filepath = temp_dir.join("results.jsonl") + get_and_dump_scholar_papers("GT4SD", str(output_filepath)) + assert output_filepath.check(file=1) + def test_basic_search(self): results = get_scholar_papers("GT4SD") assert len(results) > 0 # Ensure we get some results