From 3c644ae361aeaf69de851f19e8ba132bb3c1d5b4 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Fri, 29 Sep 2023 00:37:25 +0100 Subject: [PATCH 1/7] Some test fixes for ArticleAcceptible rules --- .../test_bll_article_batch_create_article.py | 35 ++++++++++++------- portality/bll/services/article.py | 3 +- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/doajtest/unit/test_bll_article_batch_create_article.py b/doajtest/unit/test_bll_article_batch_create_article.py index 6cda9ee82c..e397725089 100644 --- a/doajtest/unit/test_bll_article_batch_create_article.py +++ b/doajtest/unit/test_bll_article_batch_create_article.py @@ -5,7 +5,7 @@ from doajtest.helpers import DoajTestCase from portality.bll import DOAJ from portality.bll import exceptions -from portality.models import Article, Account,Journal +from portality.models import Article, Account, Journal from portality.lib.paths import rel2abs from doajtest.mocks.bll_article import BLLArticleMockFactory from doajtest.mocks.model_Article import ModelArticleMockFactory @@ -37,12 +37,14 @@ def setUp(self): self._get_duplicate = self.svc.get_duplicate self._issn_ownership_status = self.svc.issn_ownership_status self._get_journal = Article.get_journal + self._find_by_issn_exact = Journal.find_by_issn_exact def tearDown(self): self.svc.is_legitimate_owner = self._is_legitimate_owner self.svc.get_duplicate = self._get_duplicate self.svc.issn_ownership_status = self._issn_ownership_status Article.get_journal = self._get_journal + Journal.find_by_issn = self._find_by_issn_exact super(TestBLLArticleBatchCreateArticle, self).tearDown() @parameterized.expand(load_cases) @@ -118,8 +120,8 @@ def test_01_batch_create_article(self, name, kwargs): article = Article(**source) article.set_id() articles.append(article) - if add_journal_info: - journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0001"}) + # We always need a journal to exist for an article to be created + journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0001"}) # another with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( @@ -132,8 +134,7 @@ def test_01_batch_create_article(self, name, kwargs): article = Article(**source) article.set_id() articles.append(article) - if add_journal_info: - journal_specs.append({"title" : "1", "pissn" : "1111-1112", "eissn" : "1111-1111"}) + journal_specs.append({"title" : "1", "pissn" : "1111-1112", "eissn" : "1111-1111"}) # one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( @@ -146,8 +147,7 @@ def test_01_batch_create_article(self, name, kwargs): article = Article(**source) article.set_id() articles.append(article) - if add_journal_info: - journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2223"}) + journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2223"}) # another one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( @@ -160,8 +160,7 @@ def test_01_batch_create_article(self, name, kwargs): article = Article(**source) article.set_id() articles.append(article) - if add_journal_info: - journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3334"}) + journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3334"}) last_issn = "3333-3333" last_doi = "10.123/abc/1" @@ -180,8 +179,7 @@ def test_01_batch_create_article(self, name, kwargs): article = Article(**source) article.set_id() articles.append(article) - if add_journal_info: - journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4445"}) + journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4445"}) # one with a duplicated Fulltext source = ArticleFixtureFactory.make_article_source( @@ -194,8 +192,7 @@ def test_01_batch_create_article(self, name, kwargs): article = Article(**source) article.set_id() articles.append(article) - if add_journal_info: - journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5556"}) + journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5556"}) ilo_mock = None if account_arg == "owner": @@ -224,6 +221,18 @@ def test_01_batch_create_article(self, name, kwargs): gj_mock = ModelArticleMockFactory.get_journal(journal_specs, in_doaj=journal_in_doaj) Article.get_journal = gj_mock + # We need the journal to be in the index for the ArticleAcceptable checks FIXME: too slow, mock this + #[Journal(**js['instance']).save(blocking=True) for js in journal_specs] + + # We need to retrieve the correct Journal by its ISSNs + def mock_find(issns: list, in_doaj=None, max=2): + for j in journal_specs: + if sorted([j['eissn'], j['pissn']]) == sorted(issns): + return [j['instance']] + return [] + + Journal.find_by_issn_exact = mock_find + ########################################################### # Execution diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 777d499636..63d73055f3 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -282,7 +282,8 @@ def match_journal_with_validation(article_bibjson: models.ArticleBibJSON): if eissn is not None: issns.append(eissn) - journal = models.Journal.find_by_issn_exact(issns, True) + # Find an exact match, whether in_doaj or not + journal = models.Journal.find_by_issn_exact(issns) # check if only one journal matches pissn and eissn and if they are in the correct fields # no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too From aadea74c57a3cbcbe041ad88a1b285487c4b72c9 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Fri, 29 Sep 2023 02:45:02 +0100 Subject: [PATCH 2/7] Some more test fixes for ArticleAcceptible rules --- .../unit/test_tasks_ingestDOAJarticles.py | 14 +++++------ portality/bll/services/article.py | 25 +++++++++++++------ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/doajtest/unit/test_tasks_ingestDOAJarticles.py b/doajtest/unit/test_tasks_ingestDOAJarticles.py index 2872124a47..a2eb5f2be9 100644 --- a/doajtest/unit/test_tasks_ingestDOAJarticles.py +++ b/doajtest/unit/test_tasks_ingestDOAJarticles.py @@ -1260,10 +1260,10 @@ def test_40_doaj_2_journals_different_owners_issn_each_fail(self): found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])] assert len(found) == 0 - def test_41_doaj_2_journals_same_owner_issn_each_success(self): + def test_41_doaj_2_journals_same_owner_issn_each_fail(self): # Create 2 journals with the same owner, each with one different issn. The article's 2 issns # match each of these issns - # We expect a successful article ingest + # We expect a failed article ingest - articles must match only ONE journal j1 = models.Journal() j1.set_owner("testowner") bj1 = j1.bibjson() @@ -1301,18 +1301,18 @@ def test_41_doaj_2_journals_same_owner_issn_each_success(self): fu = models.FileUpload.pull(id) assert fu is not None - assert fu.status == "processed" - assert fu.imported == 1 + assert fu.status == "failed" + assert fu.imported == 0 assert fu.updates == 0 - assert fu.new == 1 + assert fu.new == 0 fr = fu.failure_reasons assert len(fr.get("shared", [])) == 0 assert len(fr.get("unowned", [])) == 0 - assert len(fr.get("unmatched", [])) == 0 + assert len(fr.get("unmatched", [])) == 2 # error message for each article found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])] - assert len(found) == 1 + assert len(found) == 0 def test_42_doaj_2_journals_different_owners_different_issns_mixed_article_fail(self): # Create 2 different journals with different owners and different issns (2 each). diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 63d73055f3..7737be124b 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -56,6 +56,9 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d all_unowned = set() all_unmatched = set() + # Generic error message if we don't have a specific reason + error_msg = Messages.EXCEPTION_ARTICLE_BATCH_FAIL + for article in articles: try: # ~~!ArticleBatchCreate:Feature->ArticleCreate:Feature~~ @@ -67,6 +70,10 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d dry_run=True) except (exceptions.ArticleMergeConflict, exceptions.ConfigurationException): raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_CONFLICT) + except exceptions.ArticleNotAcceptable as e: + # The ArticleNotAcceptable exception is a superset of reasons we can't match a journal to this article + error_msg = e.message + result = {'fail': 1, 'unmatched': set(article.bibjson().issns())} success += result.get("success", 0) fail += result.get("fail", 0) @@ -90,7 +97,7 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d # return some stats on the import return report else: - raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_FAIL, result=report) + raise exceptions.IngestException(message=error_msg, result=report) @staticmethod def _batch_contains_duplicates(articles): @@ -201,18 +208,18 @@ def create_article(self, article, account, duplicate_check=True, merge_duplicate {"arg": update_article_id, "instance": str, "allow_none": True, "arg_name": "update_article_id"} ], exceptions.ArgumentException) - # quickly validate that the article is acceptable - it must have a DOI and/or a fulltext - # this raises an exception if the article is not acceptable, containing all the relevant validation details + has_permissions_result = self.has_permissions(account, article, limit_to_account) + if isinstance(has_permissions_result, dict): + return has_permissions_result + # Validate that the article is acceptable: it must have a DOI and/or a fulltext & match only one in_doaj journal + # this raises an exception if the article is not acceptable, containing all the relevant validation details + # We do this after the permissions check because that gives a detailed result whereas this throws an exception try: self.is_acceptable(article) except Exception as e: raise e - has_permissions_result = self.has_permissions(account, article, limit_to_account) - if isinstance(has_permissions_result,dict): - return has_permissions_result - is_update = 0 if duplicate_check: # ~~!ArticleCreate:Feature->ArticleDeduplication:Feature~~ @@ -395,6 +402,10 @@ def issn_ownership_status(article, owner): issns = b.get_identifiers(b.P_ISSN) issns += b.get_identifiers(b.E_ISSN) + # FIXME: Duplicate check due to inconsistent control flow (result vs exception) + if len(issns) == 0: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_ISSNS) + owned = [] shared = [] unowned = [] From 07a975bf6a2d7b181b309b9ff74c72383304cb5b Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Fri, 29 Sep 2023 15:55:29 +0100 Subject: [PATCH 3/7] Delay raising the ArticleNotAcceptible error to similar location to the IngestExeption --- portality/bll/services/article.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 7737be124b..b5e829cd24 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -56,8 +56,8 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d all_unowned = set() all_unmatched = set() - # Generic error message if we don't have a specific reason - error_msg = Messages.EXCEPTION_ARTICLE_BATCH_FAIL + # Hold on to the exception so we can raise it later + e_not_acceptable = None for article in articles: try: @@ -72,7 +72,7 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_CONFLICT) except exceptions.ArticleNotAcceptable as e: # The ArticleNotAcceptable exception is a superset of reasons we can't match a journal to this article - error_msg = e.message + e_not_acceptable = e result = {'fail': 1, 'unmatched': set(article.bibjson().issns())} success += result.get("success", 0) @@ -97,7 +97,9 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d # return some stats on the import return report else: - raise exceptions.IngestException(message=error_msg, result=report) + if e_not_acceptable is not None: + raise exceptions.ArticleNotAcceptable(message=e_not_acceptable.message, result=report) + raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_FAIL, result=report) @staticmethod def _batch_contains_duplicates(articles): From 220aa856bfa54f942eafeb43517dd0851a421059 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Mon, 2 Oct 2023 09:31:28 +0100 Subject: [PATCH 4/7] A bit more work on control flow for ArticleNotAcceptable --- doajtest/unit/test_bll_article_create_article.py | 1 - .../unit/test_tasks_ingestCrossref442Articles.py | 16 ++++++++-------- .../unit/test_tasks_ingestCrossref531Articles.py | 3 ++- portality/bll/exceptions.py | 1 + portality/tasks/ingestarticles.py | 15 ++++++++++----- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/doajtest/unit/test_bll_article_create_article.py b/doajtest/unit/test_bll_article_create_article.py index f595a1b96e..d9d524efe7 100644 --- a/doajtest/unit/test_bll_article_create_article.py +++ b/doajtest/unit/test_bll_article_create_article.py @@ -35,7 +35,6 @@ def setUp(self): self.prepare_update_admin = self.svc._prepare_update_admin self.prepare_update_publisher = self.svc._prepare_update_publisher - def tearDown(self): super(TestBLLArticleCreateArticle, self).tearDown() diff --git a/doajtest/unit/test_tasks_ingestCrossref442Articles.py b/doajtest/unit/test_tasks_ingestCrossref442Articles.py index 2714b33644..ed2236552c 100644 --- a/doajtest/unit/test_tasks_ingestCrossref442Articles.py +++ b/doajtest/unit/test_tasks_ingestCrossref442Articles.py @@ -1315,11 +1315,11 @@ def test_40_crossref_2_journals_different_owners_issn_each_fail(self): found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])] assert len(found) == 0 - def test_41_crossref_2_journals_same_owner_issn_each_success(self): + def test_41_crossref_2_journals_same_owner_issn_each_fail(self): etree.XMLSchema = self.mock_load_schema # Create 2 journals with the same owner, each with one different issn. The article's 2 issns # match each of these issns - # We expect a successful article ingest + # We expect a failed ingest - an article must match with only ONE journal j1 = models.Journal() j1.set_owner("testowner") @@ -1365,19 +1365,19 @@ def test_41_crossref_2_journals_same_owner_issn_each_success(self): fu = models.FileUpload.pull(id) assert fu is not None - assert fu.status == "processed" - assert fu.imported == 1 + assert fu.status == "failed" + assert fu.imported == 0 assert fu.updates == 0 - assert fu.new == 1 + assert fu.new == 0 fr = fu.failure_reasons + assert len(fr) > 0 assert len(fr.get("shared", [])) == 0 assert len(fr.get("unowned", [])) == 0 - assert len(fr.get("unmatched", [])) == 0 + assert len(fr.get("unmatched", [])) == 2 found = [a for a in models.Article.find_by_issns(["1234-5678", "9876-5432"])] - assert len(found) == 1 - + assert len(found) == 0 def test_42_crossref_2_journals_different_owners_different_issns_mixed_article_fail(self): etree.XMLSchema = self.mock_load_schema diff --git a/doajtest/unit/test_tasks_ingestCrossref531Articles.py b/doajtest/unit/test_tasks_ingestCrossref531Articles.py index 27308a3d22..09edcf1b1d 100644 --- a/doajtest/unit/test_tasks_ingestCrossref531Articles.py +++ b/doajtest/unit/test_tasks_ingestCrossref531Articles.py @@ -624,7 +624,7 @@ def test_23_crossref_process_success(self): j.set_owner("testowner") bj = j.bibjson() bj.add_identifier(bj.P_ISSN, "1234-5678") - j.save() + j.save(blocking=True) asource = AccountFixtureFactory.make_publisher_source() account = models.Account(**asource) @@ -634,6 +634,7 @@ def test_23_crossref_process_success(self): # push an article to initialise the mappings source = ArticleFixtureFactory.make_article_source() article = models.Article(**source) + article.bibjson().add_identifier(bj.P_ISSN, "1234-5678") article.save(blocking=True) article.delete() models.Article.blockdeleted(article.id) diff --git a/portality/bll/exceptions.py b/portality/bll/exceptions.py index 3bb676f984..005ad7f31c 100644 --- a/portality/bll/exceptions.py +++ b/portality/bll/exceptions.py @@ -66,6 +66,7 @@ class ArticleNotAcceptable(Exception): """ def __init__(self, *args, **kwargs): self.message = kwargs.get("message", "") + self.result = kwargs.get("result", {}) super(ArticleNotAcceptable, self).__init__(*args) def __str__(self): diff --git a/portality/tasks/ingestarticles.py b/portality/tasks/ingestarticles.py index de6991ab40..e798f4005d 100644 --- a/portality/tasks/ingestarticles.py +++ b/portality/tasks/ingestarticles.py @@ -312,11 +312,16 @@ def _process(self, file_upload: models.FileUpload): for article in articles: article.set_upload_id(file_upload.id) result = articleService.batch_create_articles(articles, account, add_journal_info=True) - except (IngestException, CrosswalkException) as e: - job.add_audit_message("IngestException: {msg}. Inner message: {inner}. Stack: {x}" - .format(msg=e.message, inner=e.inner_message, x=e.trace())) + except (IngestException, CrosswalkException, ArticleNotAcceptable) as e: + if hasattr(e, 'inner_message'): + job.add_audit_message("{exception}: {msg}. Inner message: {inner}. Stack: {x}" + .format(exception=e.__class__.__name__, msg=e.message, inner=e.inner_message, x=e.trace())) + file_upload.failed(e.message, e.inner_message) + else: + job.add_audit_message("{exception}: {msg}.".format(exception=e.__class__.__name__, msg=e.message)) + file_upload.failed(e.message) + job.outcome_fail() - file_upload.failed(e.message, e.inner_message) result = e.result try: file_failed(path) @@ -324,7 +329,7 @@ def _process(self, file_upload: models.FileUpload): except: job.add_audit_message("Error cleaning up file which caused IngestException: {x}" .format(x=traceback.format_exc())) - except (DuplicateArticleException, ArticleNotAcceptable) as e: + except DuplicateArticleException as e: job.add_audit_message(str(e)) job.outcome_fail() file_upload.failed(str(e)) From 6f04d1a9a9dfc62ad150517bea8bdd3c370d2372 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Mon, 2 Oct 2023 10:45:58 +0100 Subject: [PATCH 5/7] Correct harvester fixtures for EISSN / PISSN the right way around --- doajtest/unit/resources/harvester_resp.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doajtest/unit/resources/harvester_resp.json b/doajtest/unit/resources/harvester_resp.json index dc24cb7dd9..133fedaf24 100644 --- a/doajtest/unit/resources/harvester_resp.json +++ b/doajtest/unit/resources/harvester_resp.json @@ -45,8 +45,8 @@ "journal": { "title": "My Journal", "medlineAbbreviation": "My Jour", - "essn": "1234-5678", - "issn": "9876-5432", + "issn": "1234-5678", + "essn": "9876-5432", "isoabbreviation": "My Jour", "nlmid": "123456789" } @@ -143,8 +143,8 @@ "journal": { "title": "My Journal", "medlineAbbreviation": "My Jour", - "essn": "1234-5678", - "issn": "9876-5432", + "issn": "1234-5678", + "essn": "9876-5432", "isoabbreviation": "My Jour", "nlmid": "123456789" } From 26e825bddf94016def9a3cfef8cfa87fde4729f4 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Mon, 2 Oct 2023 12:15:11 +0100 Subject: [PATCH 6/7] Fix mis-restored function in teardown --- doajtest/unit/test_bll_article_batch_create_article.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doajtest/unit/test_bll_article_batch_create_article.py b/doajtest/unit/test_bll_article_batch_create_article.py index e397725089..34f537c7a8 100644 --- a/doajtest/unit/test_bll_article_batch_create_article.py +++ b/doajtest/unit/test_bll_article_batch_create_article.py @@ -44,7 +44,7 @@ def tearDown(self): self.svc.get_duplicate = self._get_duplicate self.svc.issn_ownership_status = self._issn_ownership_status Article.get_journal = self._get_journal - Journal.find_by_issn = self._find_by_issn_exact + Journal.find_by_issn_exact = self._find_by_issn_exact super(TestBLLArticleBatchCreateArticle, self).tearDown() @parameterized.expand(load_cases) From 3af79038d7002312462d305641a1b38ed60b73c6 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Mon, 2 Oct 2023 12:36:57 +0100 Subject: [PATCH 7/7] Version bump for test fixes release --- portality/settings.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/settings.py b/portality/settings.py index 0d1edcc409..f1a11269e4 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -9,7 +9,7 @@ # Application Version information # ~~->API:Feature~~ -DOAJ_VERSION = "6.4.0" +DOAJ_VERSION = "6.4.1" API_VERSION = "3.0.1" ###################################### diff --git a/setup.py b/setup.py index 2c36468273..9be9b324ea 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='doaj', - version='6.4.0', + version='6.4.1', packages=find_packages(), install_requires=[ "awscli==1.20.50",