Skip to content

Commit

Permalink
Merge tag 'test_fixes_article_ingest' into develop
Browse files Browse the repository at this point in the history
Updates to article ingest test and error flow
  • Loading branch information
Steven-Eardley committed Oct 2, 2023
2 parents 7074f82 + 9cd2b6f commit 6c9730a
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 48 deletions.
8 changes: 4 additions & 4 deletions doajtest/unit/resources/harvester_resp.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
"journal": {
"title": "My Journal",
"medlineAbbreviation": "My Jour",
"essn": "1234-5678",
"issn": "9876-5432",
"issn": "1234-5678",
"essn": "9876-5432",
"isoabbreviation": "My Jour",
"nlmid": "123456789"
}
Expand Down Expand Up @@ -143,8 +143,8 @@
"journal": {
"title": "My Journal",
"medlineAbbreviation": "My Jour",
"essn": "1234-5678",
"issn": "9876-5432",
"issn": "1234-5678",
"essn": "9876-5432",
"isoabbreviation": "My Jour",
"nlmid": "123456789"
}
Expand Down
35 changes: 22 additions & 13 deletions doajtest/unit/test_bll_article_batch_create_article.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from doajtest.helpers import DoajTestCase
from portality.bll import DOAJ
from portality.bll import exceptions
from portality.models import Article, Account,Journal
from portality.models import Article, Account, Journal
from portality.lib.paths import rel2abs
from doajtest.mocks.bll_article import BLLArticleMockFactory
from doajtest.mocks.model_Article import ModelArticleMockFactory
Expand Down Expand Up @@ -37,12 +37,14 @@ def setUp(self):
self._get_duplicate = self.svc.get_duplicate
self._issn_ownership_status = self.svc.issn_ownership_status
self._get_journal = Article.get_journal
self._find_by_issn_exact = Journal.find_by_issn_exact

def tearDown(self):
self.svc.is_legitimate_owner = self._is_legitimate_owner
self.svc.get_duplicate = self._get_duplicate
self.svc.issn_ownership_status = self._issn_ownership_status
Article.get_journal = self._get_journal
Journal.find_by_issn_exact = self._find_by_issn_exact
super(TestBLLArticleBatchCreateArticle, self).tearDown()

@parameterized.expand(load_cases)
Expand Down Expand Up @@ -118,8 +120,8 @@ def test_01_batch_create_article(self, name, kwargs):
article = Article(**source)
article.set_id()
articles.append(article)
if add_journal_info:
journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0001"})
# We always need a journal to exist for an article to be created
journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0001"})

# another with a DOI and no fulltext
source = ArticleFixtureFactory.make_article_source(
Expand All @@ -132,8 +134,7 @@ def test_01_batch_create_article(self, name, kwargs):
article = Article(**source)
article.set_id()
articles.append(article)
if add_journal_info:
journal_specs.append({"title" : "1", "pissn" : "1111-1112", "eissn" : "1111-1111"})
journal_specs.append({"title" : "1", "pissn" : "1111-1112", "eissn" : "1111-1111"})

# one with a fulltext and no DOI
source = ArticleFixtureFactory.make_article_source(
Expand All @@ -146,8 +147,7 @@ def test_01_batch_create_article(self, name, kwargs):
article = Article(**source)
article.set_id()
articles.append(article)
if add_journal_info:
journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2223"})
journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2223"})

# another one with a fulltext and no DOI
source = ArticleFixtureFactory.make_article_source(
Expand All @@ -160,8 +160,7 @@ def test_01_batch_create_article(self, name, kwargs):
article = Article(**source)
article.set_id()
articles.append(article)
if add_journal_info:
journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3334"})
journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3334"})

last_issn = "3333-3333"
last_doi = "10.123/abc/1"
Expand All @@ -180,8 +179,7 @@ def test_01_batch_create_article(self, name, kwargs):
article = Article(**source)
article.set_id()
articles.append(article)
if add_journal_info:
journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4445"})
journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4445"})

# one with a duplicated Fulltext
source = ArticleFixtureFactory.make_article_source(
Expand All @@ -194,8 +192,7 @@ def test_01_batch_create_article(self, name, kwargs):
article = Article(**source)
article.set_id()
articles.append(article)
if add_journal_info:
journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5556"})
journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5556"})

ilo_mock = None
if account_arg == "owner":
Expand Down Expand Up @@ -224,6 +221,18 @@ def test_01_batch_create_article(self, name, kwargs):
gj_mock = ModelArticleMockFactory.get_journal(journal_specs, in_doaj=journal_in_doaj)
Article.get_journal = gj_mock

# We need the journal to be in the index for the ArticleAcceptable checks FIXME: too slow, mock this
#[Journal(**js['instance']).save(blocking=True) for js in journal_specs]

# We need to retrieve the correct Journal by its ISSNs
def mock_find(issns: list, in_doaj=None, max=2):
for j in journal_specs:
if sorted([j['eissn'], j['pissn']]) == sorted(issns):
return [j['instance']]
return []

Journal.find_by_issn_exact = mock_find

###########################################################
# Execution

Expand Down
1 change: 0 additions & 1 deletion doajtest/unit/test_bll_article_create_article.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def setUp(self):
self.prepare_update_admin = self.svc._prepare_update_admin
self.prepare_update_publisher = self.svc._prepare_update_publisher


def tearDown(self):

super(TestBLLArticleCreateArticle, self).tearDown()
Expand Down
16 changes: 8 additions & 8 deletions doajtest/unit/test_tasks_ingestCrossref442Articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,11 +1315,11 @@ def test_40_crossref_2_journals_different_owners_issn_each_fail(self):
found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])]
assert len(found) == 0

def test_41_crossref_2_journals_same_owner_issn_each_success(self):
def test_41_crossref_2_journals_same_owner_issn_each_fail(self):
etree.XMLSchema = self.mock_load_schema
# Create 2 journals with the same owner, each with one different issn. The article's 2 issns
# match each of these issns
# We expect a successful article ingest
# We expect a failed ingest - an article must match with only ONE journal

j1 = models.Journal()
j1.set_owner("testowner")
Expand Down Expand Up @@ -1365,19 +1365,19 @@ def test_41_crossref_2_journals_same_owner_issn_each_success(self):

fu = models.FileUpload.pull(id)
assert fu is not None
assert fu.status == "processed"
assert fu.imported == 1
assert fu.status == "failed"
assert fu.imported == 0
assert fu.updates == 0
assert fu.new == 1
assert fu.new == 0

fr = fu.failure_reasons
assert len(fr) > 0
assert len(fr.get("shared", [])) == 0
assert len(fr.get("unowned", [])) == 0
assert len(fr.get("unmatched", [])) == 0
assert len(fr.get("unmatched", [])) == 2

found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])]
assert len(found) == 1

assert len(found) == 0

def test_42_crossref_2_journals_different_owners_different_issns_mixed_article_fail(self):
etree.XMLSchema = self.mock_load_schema
Expand Down
3 changes: 2 additions & 1 deletion doajtest/unit/test_tasks_ingestCrossref531Articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ def test_23_crossref_process_success(self):
j.set_owner("testowner")
bj = j.bibjson()
bj.add_identifier(bj.P_ISSN, "1234-5678")
j.save()
j.save(blocking=True)

asource = AccountFixtureFactory.make_publisher_source()
account = models.Account(**asource)
Expand All @@ -634,6 +634,7 @@ def test_23_crossref_process_success(self):
# push an article to initialise the mappings
source = ArticleFixtureFactory.make_article_source()
article = models.Article(**source)
article.bibjson().add_identifier(bj.P_ISSN, "1234-5678")
article.save(blocking=True)
article.delete()
models.Article.blockdeleted(article.id)
Expand Down
14 changes: 7 additions & 7 deletions doajtest/unit/test_tasks_ingestDOAJarticles.py
Original file line number Diff line number Diff line change
Expand Up @@ -1260,10 +1260,10 @@ def test_40_doaj_2_journals_different_owners_issn_each_fail(self):
found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])]
assert len(found) == 0

def test_41_doaj_2_journals_same_owner_issn_each_success(self):
def test_41_doaj_2_journals_same_owner_issn_each_fail(self):
# Create 2 journals with the same owner, each with one different issn. The article's 2 issns
# match each of these issns
# We expect a successful article ingest
# We expect a failed article ingest - articles must match only ONE journal
j1 = models.Journal()
j1.set_owner("testowner")
bj1 = j1.bibjson()
Expand Down Expand Up @@ -1301,18 +1301,18 @@ def test_41_doaj_2_journals_same_owner_issn_each_success(self):

fu = models.FileUpload.pull(id)
assert fu is not None
assert fu.status == "processed"
assert fu.imported == 1
assert fu.status == "failed"
assert fu.imported == 0
assert fu.updates == 0
assert fu.new == 1
assert fu.new == 0

fr = fu.failure_reasons
assert len(fr.get("shared", [])) == 0
assert len(fr.get("unowned", [])) == 0
assert len(fr.get("unmatched", [])) == 0
assert len(fr.get("unmatched", [])) == 2 # error message for each article

found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])]
assert len(found) == 1
assert len(found) == 0

def test_42_doaj_2_journals_different_owners_different_issns_mixed_article_fail(self):
# Create 2 different journals with different owners and different issns (2 each).
Expand Down
1 change: 1 addition & 0 deletions portality/bll/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class ArticleNotAcceptable(Exception):
"""
def __init__(self, *args, **kwargs):
self.message = kwargs.get("message", "")
self.result = kwargs.get("result", {})
super(ArticleNotAcceptable, self).__init__(*args)

def __str__(self):
Expand Down
28 changes: 21 additions & 7 deletions portality/bll/services/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d
all_unowned = set()
all_unmatched = set()

# Hold on to the exception so we can raise it later
e_not_acceptable = None

for article in articles:
try:
# ~~!ArticleBatchCreate:Feature->ArticleCreate:Feature~~
Expand All @@ -67,6 +70,10 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d
dry_run=True)
except (exceptions.ArticleMergeConflict, exceptions.ConfigurationException):
raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_CONFLICT)
except exceptions.ArticleNotAcceptable as e:
# The ArticleNotAcceptable exception is a superset of reasons we can't match a journal to this article
e_not_acceptable = e
result = {'fail': 1, 'unmatched': set(article.bibjson().issns())}

success += result.get("success", 0)
fail += result.get("fail", 0)
Expand All @@ -90,6 +97,8 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d
# return some stats on the import
return report
else:
if e_not_acceptable is not None:
raise exceptions.ArticleNotAcceptable(message=e_not_acceptable.message, result=report)
raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_FAIL, result=report)

@staticmethod
Expand Down Expand Up @@ -201,18 +210,18 @@ def create_article(self, article, account, duplicate_check=True, merge_duplicate
{"arg": update_article_id, "instance": str, "allow_none": True, "arg_name": "update_article_id"}
], exceptions.ArgumentException)

# quickly validate that the article is acceptable - it must have a DOI and/or a fulltext
# this raises an exception if the article is not acceptable, containing all the relevant validation details
has_permissions_result = self.has_permissions(account, article, limit_to_account)
if isinstance(has_permissions_result, dict):
return has_permissions_result

# Validate that the article is acceptable: it must have a DOI and/or a fulltext & match only one in_doaj journal
# this raises an exception if the article is not acceptable, containing all the relevant validation details
# We do this after the permissions check because that gives a detailed result whereas this throws an exception
try:
self.is_acceptable(article)
except Exception as e:
raise e

has_permissions_result = self.has_permissions(account, article, limit_to_account)
if isinstance(has_permissions_result,dict):
return has_permissions_result

is_update = 0
if duplicate_check:
# ~~!ArticleCreate:Feature->ArticleDeduplication:Feature~~
Expand Down Expand Up @@ -282,7 +291,8 @@ def match_journal_with_validation(article_bibjson: models.ArticleBibJSON):
if eissn is not None:
issns.append(eissn)

journal = models.Journal.find_by_issn_exact(issns, True)
# Find an exact match, whether in_doaj or not
journal = models.Journal.find_by_issn_exact(issns)

# check if only one journal matches pissn and eissn and if they are in the correct fields
# no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too
Expand Down Expand Up @@ -394,6 +404,10 @@ def issn_ownership_status(article, owner):
issns = b.get_identifiers(b.P_ISSN)
issns += b.get_identifiers(b.E_ISSN)

# FIXME: Duplicate check due to inconsistent control flow (result vs exception)
if len(issns) == 0:
raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_ISSNS)

owned = []
shared = []
unowned = []
Expand Down
2 changes: 1 addition & 1 deletion portality/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Application Version information
# ~~->API:Feature~~

DOAJ_VERSION = "6.4.0"
DOAJ_VERSION = "6.4.1"
API_VERSION = "3.0.1"

######################################
Expand Down
15 changes: 10 additions & 5 deletions portality/tasks/ingestarticles.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,19 +312,24 @@ def _process(self, file_upload: models.FileUpload):
for article in articles:
article.set_upload_id(file_upload.id)
result = articleService.batch_create_articles(articles, account, add_journal_info=True)
except (IngestException, CrosswalkException) as e:
job.add_audit_message("IngestException: {msg}. Inner message: {inner}. Stack: {x}"
.format(msg=e.message, inner=e.inner_message, x=e.trace()))
except (IngestException, CrosswalkException, ArticleNotAcceptable) as e:
if hasattr(e, 'inner_message'):
job.add_audit_message("{exception}: {msg}. Inner message: {inner}. Stack: {x}"
.format(exception=e.__class__.__name__, msg=e.message, inner=e.inner_message, x=e.trace()))
file_upload.failed(e.message, e.inner_message)
else:
job.add_audit_message("{exception}: {msg}.".format(exception=e.__class__.__name__, msg=e.message))
file_upload.failed(e.message)

job.outcome_fail()
file_upload.failed(e.message, e.inner_message)
result = e.result
try:
file_failed(path)
ingest_exception = True
except:
job.add_audit_message("Error cleaning up file which caused IngestException: {x}"
.format(x=traceback.format_exc()))
except (DuplicateArticleException, ArticleNotAcceptable) as e:
except DuplicateArticleException as e:
job.add_audit_message(str(e))
job.outcome_fail()
file_upload.failed(str(e))
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='doaj',
version='6.4.0',
version='6.4.1',
packages=find_packages(),
install_requires=[
"awscli==1.20.50",
Expand Down

0 comments on commit 6c9730a

Please sign in to comment.