From 1d3e8bbdb84c96a22b4ee3faf368305ddf5c79a8 Mon Sep 17 00:00:00 2001 From: Aga Date: Thu, 8 Jun 2023 10:40:08 +0100 Subject: [PATCH 01/15] add check --- portality/bll/services/article.py | 4 ++++ portality/ui/messages.py | 1 + 2 files changed, 5 insertions(+) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 7b55894d24..1e0e2ac76d 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -170,6 +170,10 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if pissn == eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_IDENTICAL_PISSN_AND_EISSN) + journals = models.Journal.find_by_issn([pissn,eissn], True) + if journals is None or len(journals) > 1: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False, update_article_id=None): diff --git a/portality/ui/messages.py b/portality/ui/messages.py index 094751c97c..cf8092e212 100644 --- a/portality/ui/messages.py +++ b/portality/ui/messages.py @@ -61,6 +61,7 @@ class Messages(object): EXCEPTION_NO_CONTRIBUTORS_EXPLANATION = "DOAJ requires at least one author for each article." EXCEPTION_TOO_MANY_ISSNS = "Too many ISSNs. Only 2 ISSNs are allowed: one Print ISSN and one Online ISSN." + EXCEPTION_MISMATCHED_ISSNS = "Issns provided don't match any journal." EXCEPTION_ISSNS_OF_THE_SAME_TYPE = "Both ISSNs have the same type: {type}" EXCEPTION_IDENTICAL_PISSN_AND_EISSN = "The Print and Online ISSNs supplied are identical. If you supply 2 ISSNs they must be different." EXCEPTION_NO_ISSNS = "Neither Print ISSN nor Online ISSN has been supplied. DOAJ requires at least one ISSN." From 83b6fded639bba06090b38c8569f81f5da819e00 Mon Sep 17 00:00:00 2001 From: Aga Date: Thu, 8 Jun 2023 14:01:38 +0100 Subject: [PATCH 02/15] add check and unit tests --- ...issn_validation_against_journal.matrix.csv | 17 +++ ...sn_validation_against_journal.settings.csv | 19 +++ ...n_validation_against_journal.settings.json | 119 ++++++++++++++++++ ...test_article_acceptable_and_permissions.py | 56 ++++++++- portality/bll/services/article.py | 10 +- 5 files changed, 218 insertions(+), 3 deletions(-) create mode 100644 doajtest/matrices/article_create_article/issn_validation_against_journal.matrix.csv create mode 100644 doajtest/matrices/article_create_article/issn_validation_against_journal.settings.csv create mode 100644 doajtest/matrices/article_create_article/issn_validation_against_journal.settings.json diff --git a/doajtest/matrices/article_create_article/issn_validation_against_journal.matrix.csv b/doajtest/matrices/article_create_article/issn_validation_against_journal.matrix.csv new file mode 100644 index 0000000000..0d2f704aba --- /dev/null +++ b/doajtest/matrices/article_create_article/issn_validation_against_journal.matrix.csv @@ -0,0 +1,17 @@ +test_id,eissn,pissn,validated +1,eissn_in_doaj,pissn_in_doaj,yes +2,eissn_in_doaj,eissn_not_in_doaj, +3,eissn_in_doaj,pissn_not_in_doaj, +4,eissn_in_doaj,!eissn_in_doaj, +5,pissn_in_doaj,eissn_in_doaj, +6,pissn_in_doaj,eissn_not_in_doaj, +7,pissn_in_doaj,pissn_not_in_doaj, +8,pissn_in_doaj,!pissn_in_doaj, +9,eissn_not_in_doaj,eissn_in_doaj, +10,eissn_not_in_doaj,pissn_in_doaj, +11,eissn_not_in_doaj,pissn_not_in_doaj, +12,eissn_not_in_doaj,!eissn_not_in_doaj, +13,pissn_not_in_doaj,eissn_in_doaj, +14,pissn_not_in_doaj,pissn_in_doaj, +15,pissn_not_in_doaj,eissn_not_in_doaj, +16,pissn_not_in_doaj,!pissn_not_in_doaj, diff --git a/doajtest/matrices/article_create_article/issn_validation_against_journal.settings.csv b/doajtest/matrices/article_create_article/issn_validation_against_journal.settings.csv new file mode 100644 index 0000000000..a8eab3f4ce --- /dev/null +++ b/doajtest/matrices/article_create_article/issn_validation_against_journal.settings.csv @@ -0,0 +1,19 @@ +field,test_id,eissn,pissn,validated +type,index,generated,generated,conditional +deafult,,,,no +,,,, +values,,eissn_in_doaj,eissn_in_doaj,yes +values,,pissn_in_doaj,pissn_in_doaj,no +values,,eissn_not_in_doaj,eissn_not_in_doaj, +values,,pissn_not_in_doaj,pissn_not_in_doaj, +,,,, +,,,, +conditional validated,,eissn_in_doaj,pissn_in_doaj,yes +constraint eissn,,eissn_in_doaj,!eissn_in_doaj, +constraint eissn,,eissn_not_in_doaj,!eissn_not_in_doaj, +constraint eissn,,pissn_not_in_doaj,!pissn_not_in_doaj, +constraint eissn,,pissn_in_doaj,!pissn_in_doaj, +constraint pissn,,eissn_in_doaj,!eissn_in_doaj, +constraint pissn,,eissn_not_in_doaj,!eissn_not_in_doaj, +constraint pissn,,pissn_not_in_doaj,!pissn_not_in_doaj, +constraint pissn,,pissn_in_doaj,!pissn_in_doaj, \ No newline at end of file diff --git a/doajtest/matrices/article_create_article/issn_validation_against_journal.settings.json b/doajtest/matrices/article_create_article/issn_validation_against_journal.settings.json new file mode 100644 index 0000000000..11d1012a96 --- /dev/null +++ b/doajtest/matrices/article_create_article/issn_validation_against_journal.settings.json @@ -0,0 +1,119 @@ +{ + "parameters": [ + { + "name": "test_id", + "type": "index" + }, + { + "name": "eissn", + "type": "generated", + "values": { + "eissn_in_doaj": { + "constraints": { + "pissn": { + "nor": [ + "eissn_in_doaj" + ] + } + } + }, + "pissn_in_doaj": { + "constraints": { + "pissn": { + "nor": [ + "pissn_in_doaj" + ] + } + } + }, + "eissn_not_in_doaj": { + "constraints": { + "pissn": { + "nor": [ + "eissn_not_in_doaj" + ] + } + } + }, + "pissn_not_in_doaj": { + "constraints": { + "pissn": { + "nor": [ + "pissn_not_in_doaj" + ] + } + } + } + } + }, + { + "name": "pissn", + "type": "generated", + "values": { + "eissn_in_doaj": {}, + "pissn_in_doaj": {}, + "eissn_not_in_doaj": {}, + "pissn_not_in_doaj": {}, + "!eissn_in_doaj": { + "constraints": { + "eissn": { + "or": [ + "eissn_in_doaj" + ] + } + } + }, + "!eissn_not_in_doaj": { + "constraints": { + "eissn": { + "or": [ + "eissn_not_in_doaj" + ] + } + } + }, + "!pissn_not_in_doaj": { + "constraints": { + "eissn": { + "or": [ + "pissn_not_in_doaj" + ] + } + } + }, + "!pissn_in_doaj": { + "constraints": { + "eissn": { + "or": [ + "pissn_in_doaj" + ] + } + } + } + } + }, + { + "name": "validated", + "type": "conditional", + "values": { + "yes": { + "conditions": [ + { + "eissn": { + "or": [ + "eissn_in_doaj" + ] + }, + "pissn": { + "or": [ + "pissn_in_doaj" + ] + } + } + ] + }, + "no": {} + } + } + ] +} \ No newline at end of file diff --git a/doajtest/unit/test_article_acceptable_and_permissions.py b/doajtest/unit/test_article_acceptable_and_permissions.py index eb4c04d4fb..881323ba92 100644 --- a/doajtest/unit/test_article_acceptable_and_permissions.py +++ b/doajtest/unit/test_article_acceptable_and_permissions.py @@ -14,6 +14,11 @@ def is_acceptable_load_cases(): "test_id", {"test_id": []}) +def issn_validation_against_journal_load_sets(): + return load_parameter_sets(rel2abs(__file__, "..", "matrices", "article_create_article"), "issn_validation_against_journal", + "test_id", + {"test_id": []}) + class TestBLLPrepareUpdatePublisher(DoajTestCase): @@ -110,4 +115,53 @@ def test_has_permissions(self): assert failed_result["unowned"].sort() == [pissn, eissn].sort() # assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [], # 'unowned': [pissn, eissn], - # 'unmatched': []}, "received: {}".format(failed_result) \ No newline at end of file + # 'unmatched': []}, "received: {}".format(failed_result) + + + @parameterized.expand(issn_validation_against_journal_load_sets) + def test_issn_validation_against_journal_load_sets(self, value, kwargs): + kwpissn = kwargs.get("pissn") + kweissn = kwargs.get("eissn") + validated = kwargs.get("validated") + + js = JournalFixtureFactory.make_many_journal_sources(2) + journal_in_doaj = Journal(**js[0]) + journal_in_doaj.set_in_doaj(True) + journal_in_doaj.bibjson().pissn = "1111-1111" + journal_in_doaj.bibjson().eissn = "2222-2222" + journal_in_doaj.save(blocking=True) + + journal_not_in_doaj = Journal(**js[1]) + journal_not_in_doaj.set_in_doaj(False) + journal_not_in_doaj.bibjson().pissn = "3333-3333" + journal_not_in_doaj.bibjson().eissn = "4444-4444" + journal_not_in_doaj.save(blocking=True) + + if (kwpissn == "pissn_in_doaj"): + pissn = journal_in_doaj.bibjson().pissn + elif (kwpissn == "eissn_in_doaj"): + pissn = journal_in_doaj.bibjson().eissn + elif (kwpissn == "pissn_not_in_doaj"): + pissn = journal_not_in_doaj.bibjson().pissn + else: + pissn = journal_not_in_doaj.bibjson().eissn + + if (kweissn == "pissn_in_doaj"): + eissn = journal_in_doaj.bibjson().pissn + elif (kweissn == "eissn_in_doaj"): + eissn = journal_in_doaj.bibjson().eissn + elif (kweissn == "pissn_not_in_doaj"): + eissn = journal_not_in_doaj.bibjson().pissn + else: + eissn = journal_not_in_doaj.bibjson().eissn + + + art_source = ArticleFixtureFactory.make_article_source(pissn=pissn, eissn=eissn) + article = Article(**art_source) + + if validated: + self.assertIsNone(self.svc.is_acceptable(article)) + + else: + with self.assertRaises(exceptions.ArticleNotAcceptable): + self.svc.is_acceptable(article) \ No newline at end of file diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 1e0e2ac76d..5430b5f3ef 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -170,8 +170,14 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if pissn == eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_IDENTICAL_PISSN_AND_EISSN) - journals = models.Journal.find_by_issn([pissn,eissn], True) - if journals is None or len(journals) > 1: + journalp = models.Journal.find_by_issn([pissn], True) + journale = models.Journal.find_by_issn([eissn], True) + + # check if only one and the same journal matches pissn and eissn and if they are in the correct fields + if len(journalp) != 1 or \ + len(journale) != 1 or \ + journale[0].id != journalp[0].id or \ + journale[0].bibjson().pissn != pissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, From c5392bd7aeb2f6c9198c037062d52619bdea4900 Mon Sep 17 00:00:00 2001 From: Aga Date: Thu, 8 Jun 2023 14:05:12 +0100 Subject: [PATCH 03/15] one more unit test --- ...test_article_acceptable_and_permissions.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/doajtest/unit/test_article_acceptable_and_permissions.py b/doajtest/unit/test_article_acceptable_and_permissions.py index 881323ba92..5e0328635f 100644 --- a/doajtest/unit/test_article_acceptable_and_permissions.py +++ b/doajtest/unit/test_article_acceptable_and_permissions.py @@ -164,4 +164,24 @@ def test_issn_validation_against_journal_load_sets(self, value, kwargs): else: with self.assertRaises(exceptions.ArticleNotAcceptable): - self.svc.is_acceptable(article) \ No newline at end of file + self.svc.is_acceptable(article) + + def test_check_validation_for_2_journals(self): + + js = JournalFixtureFactory.make_many_journal_sources(2, in_doaj=True) + journal_in_doaj = Journal(**js[0]) + journal_in_doaj.bibjson().pissn = "1111-1111" + journal_in_doaj.bibjson().eissn = "2222-2222" + journal_in_doaj.save(blocking=True) + + journal_not_in_doaj = Journal(**js[1]) + journal_not_in_doaj.bibjson().pissn = "3333-3333" + journal_not_in_doaj.bibjson().eissn = "4444-4444" + journal_not_in_doaj.save(blocking=True) + + + art_source = ArticleFixtureFactory.make_article_source(pissn="1111-1111", eissn="4444-4444") + article = Article(**art_source) + + with self.assertRaises(exceptions.ArticleNotAcceptable): + self.svc.is_acceptable(article) \ No newline at end of file From bca84c303821ac5323afbc2c2140acada6d04f1c Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 9 Jun 2023 12:52:30 +0100 Subject: [PATCH 04/15] add new query and unit tests --- doajtest/unit/test_models.py | 27 ++++++++++++++++++++++++ portality/bll/services/article.py | 15 ++++++-------- portality/models/v2/journal.py | 34 +++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/doajtest/unit/test_models.py b/doajtest/unit/test_models.py index 09fb0d0205..1459fab2ab 100644 --- a/doajtest/unit/test_models.py +++ b/doajtest/unit/test_models.py @@ -1641,3 +1641,30 @@ def test_get_name_safe(self): # account does not exist assert models.Account.get_name_safe('not existing account id') == '' + def test_11_find_by_issn(self): + js = JournalFixtureFactory.make_many_journal_sources(2, in_doaj=True) + j1 = models.Journal(**js[0]) + j1.bibjson().pissn = "1111-1111" + j1.bibjson().eissn = "2222-2222" + j1.save(blocking=True) + + j2 = models.Journal(**js[1]) + j2.bibjson().pissn = "3333-3333" + j2.bibjson().eissn = "4444-4444" + j2.save(blocking=True) + + journals = models.Journal.find_by_issn(["1111-1111", "2222-2222"], True) + assert len(journals) == 1 + assert journals[0].id == j1.id + + journals = models.Journal.find_by_issn(["1111-1111", "3333-3333"], True) + assert len(journals) == 2 + assert journals[0].id == j1.id + assert journals[1].id == j2.id + + journals = models.Journal.find_by_issn_exact(["1111-1111", "2222-2222"], True) + assert len(journals) == 1 + assert journals[0].id == j1.id + + journals = models.Journal.find_by_issn_exact(["1111-1111", "3333-3333"], True) + assert len(journals) == 0 \ No newline at end of file diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 5430b5f3ef..d9fb6701e9 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -170,20 +170,17 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if pissn == eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_IDENTICAL_PISSN_AND_EISSN) - journalp = models.Journal.find_by_issn([pissn], True) - journale = models.Journal.find_by_issn([eissn], True) - - # check if only one and the same journal matches pissn and eissn and if they are in the correct fields - if len(journalp) != 1 or \ - len(journale) != 1 or \ - journale[0].id != journalp[0].id or \ - journale[0].bibjson().pissn != pissn: + journal = models.Journal.find_by_issn_exact([pissn,eissn], True) + + # check if only one journal matches pissn and eissn and if they are in the correct fields + # no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too + if len(journal) != 1 or journal[0].bibjson().pissn != pissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False, update_article_id=None): - """ + """# no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too Create an individual article in the database This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with diff --git a/portality/models/v2/journal.py b/portality/models/v2/journal.py index 41c50a7ce9..5a4997a0de 100644 --- a/portality/models/v2/journal.py +++ b/portality/models/v2/journal.py @@ -70,6 +70,22 @@ def find_by_issn(cls, issns, in_doaj=None, max=10): records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] return records + @classmethod + def find_by_issn_exact(cls, issns, in_doaj=None, max=2): + """ + Finds journal that matches given issns exactly - if no data problems should always be only 1 + """ + if not isinstance(issns, list): + issns = [issns] + if len(issns) > 2: + return [] + q = JournalQuery() + q.find_by_issn_exact(issns, in_doaj=in_doaj, max=max) + result = cls.query(q=q.query) + # create an array of objects, using cls rather than Journal, which means subclasses can use it too + records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] + return records + @classmethod def issns_by_owner(cls, owner, in_doaj=None): q = IssnQuery(owner, in_doaj=in_doaj) @@ -922,6 +938,16 @@ class JournalQuery(object): } } + must_query = { + "track_total_hits": True, + "query": { + "bool": { + "must": [ + ] + } + } + } + all_doaj = { "track_total_hits": True, "query": { @@ -947,6 +973,14 @@ def find_by_issn(self, issns, in_doaj=None, max=10): self.query["query"]["bool"]["must"].append({"term": {"admin.in_doaj": in_doaj}}) self.query["size"] = max + def find_by_issn_exact(self, issns, in_doaj=None, max=10): + self.query = deepcopy(self.must_query) + for issn in issns: + self.query["query"]["bool"]["must"].append({"term": {"index.issn.exact": issn}}) + if in_doaj is not None: + self.query["query"]["bool"]["must"].append({"term": {"admin.in_doaj": in_doaj}}) + self.query["size"] = max + def all_in_doaj(self): q = deepcopy(self.all_doaj) if self.minified: From 0122262cd79df3e1c803812114a1a8c81541253a Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 9 Jun 2023 13:32:25 +0100 Subject: [PATCH 05/15] remove duplicated code --- portality/bll/services/article.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index d9fb6701e9..feebf2b481 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -159,9 +159,6 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if len(pissn) > 1 or len(eissn) > 1: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_TOO_MANY_ISSNS) - pissn = article_bibjson.get_one_identifier("pissn") - eissn = article_bibjson.get_one_identifier("eissn") - # no pissn or eissn if not pissn and not eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_ISSNS) From e0db3f581df83e5cc958f96865d073f05e5b11c3 Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 9 Jun 2023 14:21:06 +0100 Subject: [PATCH 06/15] add script to find all articles with invalid issns --- ...230609_find_articles_with_invalid_issns.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 portality/scripts/230609_find_articles_with_invalid_issns.py diff --git a/portality/scripts/230609_find_articles_with_invalid_issns.py b/portality/scripts/230609_find_articles_with_invalid_issns.py new file mode 100644 index 0000000000..acebc0ae7a --- /dev/null +++ b/portality/scripts/230609_find_articles_with_invalid_issns.py @@ -0,0 +1,68 @@ +from portality import models +from portality.bll.services import article as articlesvc +from portality.bll import exceptions +from portality.core import es_connection +from portality.util import ipt_prefix +import esprit +import csv + +IN_DOAJ = { + "query": { + "bool": { + "must": [ + {"term" : {"admin.in_doaj":True}} + ] + } + } +} + + +if __name__ == "__main__": + + # import argparse + # + # parser = argparse.ArgumentParser() + # parser.add_argument("-o", "--out", help="output file path") + # args = parser.parse_args() + # + # if not args.out: + # print("Please specify an output file path with the -o option") + # parser.print_help() + # exit() + + out = "out.csv" + + with open(out, "w", encoding="utf-8") as f: + writer = csv.writer(f) + writer.writerow(["ID", "PISSN", "EISSN", "Journals found with article's PISSN", "In doaj?", "Journals found with article's EISSN", "In doaj?", "Error"]) + + for a in models.Article.iterate(q=IN_DOAJ, page_size=100, keepalive='5m'): + print(a["id"]) + article = models.Article(_source=a) + bibjson = article.bibjson() + + try: + articlesvc.ArticleService._validate_issns(bibjson) + except exceptions.ArticleNotAcceptable as e: + id = article.id + pissn = bibjson.get_identifiers("pissn") + eissn = bibjson.get_identifiers("eissn") + j_p = [j["id"] for j in models.Journal.find_by_issn(pissn)] + j_p_in_doaj = [] + if (j_p): + for j in j_p: + jobj = models.Journal.pull(j) + if (jobj): + j_p_in_doaj.append(jobj.is_in_doaj()) + else: + j_p_in_doaj.append("n/a") + j_e = [j["id"] for j in models.Journal.find_by_issn(eissn)] + j_e_in_doaj = [] + if (j_e): + for j in j_e: + jobj = models.Journal.pull(j) + if (jobj): + j_e_in_doaj.append(jobj.is_in_doaj()) + else: + j_e_in_doaj.append("n/a") + writer.writerow([id, pissn, eissn, j_p, j_p_in_doaj, j_e, j_e_in_doaj, str(e)]) \ No newline at end of file From 3911cff2657ede8ddd966898ca287cb990569308 Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 9 Jun 2023 14:25:01 +0100 Subject: [PATCH 07/15] revert mistakenly removed code --- portality/bll/services/article.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index feebf2b481..17f872582d 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -159,6 +159,9 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if len(pissn) > 1 or len(eissn) > 1: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_TOO_MANY_ISSNS) + pissn = article_bibjson.get_one_identifier("pissn") + eissn = article_bibjson.get_one_identifier("eissn") + # no pissn or eissn if not pissn and not eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_ISSNS) @@ -167,12 +170,24 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if pissn == eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_IDENTICAL_PISSN_AND_EISSN) - journal = models.Journal.find_by_issn_exact([pissn,eissn], True) + issns = [] + if pissn is not None: + issns.append(pissn) + if eissn is not None: + issns.append(eissn) + + journal = models.Journal.find_by_issn_exact(issns, True) # check if only one journal matches pissn and eissn and if they are in the correct fields # no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too - if len(journal) != 1 or journal[0].bibjson().pissn != pissn: + if len(journal) != 1: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + if pissn is not None: + if journal[0].bibjson().pissn != pissn: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + if eissn is not None: + if journal[0].bibjson().eissn != eissn: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False, update_article_id=None): From 27878361116f77a3ae1e2656b599001aa522006b Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 9 Jun 2023 14:29:15 +0100 Subject: [PATCH 08/15] remove unnecessary print --- portality/scripts/230609_find_articles_with_invalid_issns.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/portality/scripts/230609_find_articles_with_invalid_issns.py b/portality/scripts/230609_find_articles_with_invalid_issns.py index acebc0ae7a..c45b8c5d63 100644 --- a/portality/scripts/230609_find_articles_with_invalid_issns.py +++ b/portality/scripts/230609_find_articles_with_invalid_issns.py @@ -37,10 +37,8 @@ writer.writerow(["ID", "PISSN", "EISSN", "Journals found with article's PISSN", "In doaj?", "Journals found with article's EISSN", "In doaj?", "Error"]) for a in models.Article.iterate(q=IN_DOAJ, page_size=100, keepalive='5m'): - print(a["id"]) article = models.Article(_source=a) bibjson = article.bibjson() - try: articlesvc.ArticleService._validate_issns(bibjson) except exceptions.ArticleNotAcceptable as e: From 97dd518a6268d8460e3c6cdf78b072e17172d2bf Mon Sep 17 00:00:00 2001 From: Aga Date: Mon, 26 Jun 2023 14:21:19 +0100 Subject: [PATCH 09/15] capitalisation on issns in messages --- portality/ui/messages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/ui/messages.py b/portality/ui/messages.py index cf8092e212..2164502784 100644 --- a/portality/ui/messages.py +++ b/portality/ui/messages.py @@ -61,7 +61,7 @@ class Messages(object): EXCEPTION_NO_CONTRIBUTORS_EXPLANATION = "DOAJ requires at least one author for each article." EXCEPTION_TOO_MANY_ISSNS = "Too many ISSNs. Only 2 ISSNs are allowed: one Print ISSN and one Online ISSN." - EXCEPTION_MISMATCHED_ISSNS = "Issns provided don't match any journal." + EXCEPTION_MISMATCHED_ISSNS = "ISSNs provided don't match any journal." EXCEPTION_ISSNS_OF_THE_SAME_TYPE = "Both ISSNs have the same type: {type}" EXCEPTION_IDENTICAL_PISSN_AND_EISSN = "The Print and Online ISSNs supplied are identical. If you supply 2 ISSNs they must be different." EXCEPTION_NO_ISSNS = "Neither Print ISSN nor Online ISSN has been supplied. DOAJ requires at least one ISSN." From 318a3f3db5621c31e23b3fed23a295eb4c2ede37 Mon Sep 17 00:00:00 2001 From: Aga Date: Mon, 26 Jun 2023 14:37:49 +0100 Subject: [PATCH 10/15] create separate function for validation issns against journal rather than in validate issn --- portality/bll/services/article.py | 42 +++++++++++++++++-------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 17f872582d..2782538a0f 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -170,25 +170,6 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if pissn == eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_IDENTICAL_PISSN_AND_EISSN) - issns = [] - if pissn is not None: - issns.append(pissn) - if eissn is not None: - issns.append(eissn) - - journal = models.Journal.find_by_issn_exact(issns, True) - - # check if only one journal matches pissn and eissn and if they are in the correct fields - # no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too - if len(journal) != 1: - raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) - if pissn is not None: - if journal[0].bibjson().pissn != pissn: - raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) - if eissn is not None: - if journal[0].bibjson().eissn != eissn: - raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) - def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False, update_article_id=None): @@ -285,6 +266,7 @@ def is_acceptable(self, article: models.Article): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_DOI_NO_FULLTEXT) self._validate_issns(bj) + self.does_article_match_journal(bj) # is journal in doaj (we do this check last as it has more performance impact) journal = article.get_journal() @@ -292,6 +274,28 @@ def is_acceptable(self, article: models.Article): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_ADDING_ARTICLE_TO_WITHDRAWN_JOURNAL) @staticmethod + def does_article_match_journal(article_bibjson: models.ArticleBibJSON): + pissn = article_bibjson.get_one_identifier("pissn") + eissn = article_bibjson.get_one_identifier("eissn") + + if pissn is not None: + issns.append(pissn) + if eissn is not None: + issns.append(eissn) + + journal = models.Journal.find_by_issn_exact(issns, True) + + # check if only one journal matches pissn and eissn and if they are in the correct fields + # no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too + if len(journal) != 1: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + if pissn is not None: + if journal[0].bibjson().pissn != pissn: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + if eissn is not None: + if journal[0].bibjson().eissn != eissn: + raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + @staticmethod def is_legitimate_owner(article, owner): """ Determine if the owner id is the owner of the article From 1b6624ac7ae6b362ffa3e243197469f225468742 Mon Sep 17 00:00:00 2001 From: Aga Date: Mon, 26 Jun 2023 15:09:57 +0100 Subject: [PATCH 11/15] naming bugs --- portality/bll/services/article.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 2782538a0f..733788d682 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -295,6 +295,8 @@ def does_article_match_journal(article_bibjson: models.ArticleBibJSON): if eissn is not None: if journal[0].bibjson().eissn != eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) + + return journal[0] @staticmethod def is_legitimate_owner(article, owner): """ From a8353b775598762854083c563df679ba152a97c9 Mon Sep 17 00:00:00 2001 From: Aga Date: Mon, 26 Jun 2023 15:10:33 +0100 Subject: [PATCH 12/15] return journal after match with validation and use it in is_acceptable method --- portality/bll/services/article.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 733788d682..93b48376f9 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -266,10 +266,10 @@ def is_acceptable(self, article: models.Article): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_DOI_NO_FULLTEXT) self._validate_issns(bj) - self.does_article_match_journal(bj) + journal = self.does_article_match_journal(bj) # is journal in doaj (we do this check last as it has more performance impact) - journal = article.get_journal() + # journal = article.get_journal() if journal is None or not journal.is_in_doaj(): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_ADDING_ARTICLE_TO_WITHDRAWN_JOURNAL) From 6331c1e39f5f150c8bd319cbb5d69f1d28eb66e6 Mon Sep 17 00:00:00 2001 From: Aga Date: Wed, 28 Jun 2023 11:08:21 +0100 Subject: [PATCH 13/15] refactoring --- portality/bll/services/article.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 93b48376f9..982aa6d505 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -266,7 +266,7 @@ def is_acceptable(self, article: models.Article): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_DOI_NO_FULLTEXT) self._validate_issns(bj) - journal = self.does_article_match_journal(bj) + journal = self.match_journal_with_validation(bj) # is journal in doaj (we do this check last as it has more performance impact) # journal = article.get_journal() From 900ecf7b92e283c4884553d2e30ee64140a05149 Mon Sep 17 00:00:00 2001 From: Aga Date: Wed, 28 Jun 2023 11:24:43 +0100 Subject: [PATCH 14/15] fix errors found by UT --- portality/bll/services/article.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 982aa6d505..77aac68600 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -159,9 +159,6 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): if len(pissn) > 1 or len(eissn) > 1: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_TOO_MANY_ISSNS) - pissn = article_bibjson.get_one_identifier("pissn") - eissn = article_bibjson.get_one_identifier("eissn") - # no pissn or eissn if not pissn and not eissn: raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_NO_ISSNS) @@ -274,10 +271,12 @@ def is_acceptable(self, article: models.Article): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_ADDING_ARTICLE_TO_WITHDRAWN_JOURNAL) @staticmethod - def does_article_match_journal(article_bibjson: models.ArticleBibJSON): + def match_journal_with_validation(article_bibjson: models.ArticleBibJSON): pissn = article_bibjson.get_one_identifier("pissn") eissn = article_bibjson.get_one_identifier("eissn") + issns = [] + if pissn is not None: issns.append(pissn) if eissn is not None: From e313ee5fcd9dd52568c4458bd9e8e9c2f71c84ab Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Fri, 4 Aug 2023 14:33:58 +0100 Subject: [PATCH 15/15] minor code layout tweaks --- portality/bll/services/article.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index 77aac68600..777d499636 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -170,7 +170,7 @@ def _validate_issns(article_bibjson: models.ArticleBibJSON): def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False, update_article_id=None): - """# no need to check eissn, if pissn matches, pissn and eissn are different and only 1 journal has been found - then eissn matches too + """ Create an individual article in the database This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with @@ -249,7 +249,8 @@ def has_permissions(self, account, article, limit_to_account): def is_acceptable(self, article: models.Article): """ conduct some deep validation on the article to make sure we will accept it - or the moment, this just means making sure it has a DOI and a fulltext + this just means making sure it has a DOI and a fulltext, and that its ISSNs + match a single journal """ try: bj = article.bibjson() @@ -266,7 +267,6 @@ def is_acceptable(self, article: models.Article): journal = self.match_journal_with_validation(bj) # is journal in doaj (we do this check last as it has more performance impact) - # journal = article.get_journal() if journal is None or not journal.is_in_doaj(): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_ADDING_ARTICLE_TO_WITHDRAWN_JOURNAL) @@ -296,6 +296,7 @@ def match_journal_with_validation(article_bibjson: models.ArticleBibJSON): raise exceptions.ArticleNotAcceptable(message=Messages.EXCEPTION_MISMATCHED_ISSNS) return journal[0] + @staticmethod def is_legitimate_owner(article, owner): """