From 03cad791e962f5434db6d89106c9d9f05134a9b5 Mon Sep 17 00:00:00 2001
From: yashlamba <yashlamba2000@gmail.com>
Date: Thu, 28 Nov 2024 11:08:46 +0100
Subject: [PATCH] curation: update config & logging; minor fixes

---
 site/setup.cfg                       |  2 +-
 site/zenodo_rdm/curation/config.py   |  5 +++++
 site/zenodo_rdm/curation/curators.py | 22 ++++++++++++----------
 site/zenodo_rdm/curation/ext.py      | 10 ++++++++--
 site/zenodo_rdm/curation/rules.py    | 20 +++++++++++++-------
 site/zenodo_rdm/curation/tasks.py    | 21 ++++++++++++++-------
 6 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/site/setup.cfg b/site/setup.cfg
index 7e30e2a2..e0cec983 100644
--- a/site/setup.cfg
+++ b/site/setup.cfg
@@ -71,7 +71,7 @@ invenio_celery.tasks =
     zenodo_rdm_openaire = zenodo_rdm.openaire.tasks
     zenodo_rdm_moderation = zenodo_rdm.moderation.tasks
     zenodo_stats = zenodo_rdm.stats.tasks
-    zenodo_rdm_curations = zenodo_rdm.curation.tasks
+    zenodo_rdm_curation = zenodo_rdm.curation.tasks
 invenio_oauth2server.scopes =
     deposit_write_scope = zenodo_rdm.legacy.scopes:deposit_write_scope
     deposit_actions_scope = zenodo_rdm.legacy.scopes:deposit_actions_scope
diff --git a/site/zenodo_rdm/curation/config.py b/site/zenodo_rdm/curation/config.py
index 9e0f47e2..a9efae57 100644
--- a/site/zenodo_rdm/curation/config.py
+++ b/site/zenodo_rdm/curation/config.py
@@ -27,5 +27,10 @@
 }
 """Rule scores for EU Curation."""
 
+
+CURATION_THRESHOLDS = {"EU_RECORDS_CURATION": 10}
+"""Threshold values for curators/rules."""
+
+
 CURATION_ENABLE_EU_CURATOR = False
 """Controls whether to dry run EU Curation."""
diff --git a/site/zenodo_rdm/curation/curators.py b/site/zenodo_rdm/curation/curators.py
index 81698e9d..acddef99 100644
--- a/site/zenodo_rdm/curation/curators.py
+++ b/site/zenodo_rdm/curation/curators.py
@@ -18,10 +18,9 @@
 class BaseCurator:
     """Base Curator class."""
 
-    def __init__(self, dry=False, raise_exc=False):
+    def __init__(self, dry=False):
         """Constructor."""
         self.dry = dry
-        self.raise_exc = raise_exc
 
     def _evaluator(self, results):
         """Evaluates final result for based on results dict."""
@@ -32,14 +31,14 @@ def rules(self):
         """Get rules to run."""
         raise NotImplementedError()
 
-    def run(self, record):
+    def run(self, record, raise_rule_exc=False):
         """Run rules for the curator and evaluate result."""
         rule_results = {}
         for name, rule in self.rules.items():
             try:
                 rule_results[name] = rule(record)
             except Exception as e:
-                if self.raise_exc:
+                if raise_rule_exc:
                     raise e
                 rule_results[name] = None
 
@@ -61,7 +60,9 @@ def _evaluator(self, results):
         score = 0
         for rule, result in results.items():
             rule_score = current_curation.scores.get(rule)
-            if isinstance(rule_score, int):
+            if result is None:
+                continue
+            elif isinstance(rule_score, int):
                 score += rule_score if result else 0
             elif isinstance(rule_score, bool):
                 if result:
@@ -70,7 +71,7 @@ def _evaluator(self, results):
                     continue
             else:
                 raise ValueError("Unsupported score type configured.")
-        return score >= current_app.config.get("CURATION_EU_CURATION_THRESHOLD")
+        return score >= current_curation.thresholds.get("EU_RECORDS_CURATION")
 
     @property
     def rules(self):
@@ -80,15 +81,16 @@ def rules(self):
     def _post_run(self, record, result):
         """Actions to take after run."""
         if self.dry:
-            current_app.logger.info(
-                f"Processed record ID: {record.pid.pid_value}", result
-            )  # TODO use error? Or should we log from the task
+            current_app.logger.error(
+                "Evaluation for EU record curator",
+                extra={"record_id": record.pid.pid_value, "result": result},
+            )
             return
         if result["evaluation"]:
             with UnitOfWork() as uow:
                 current_record_communities_service.bulk_add(
                     system_identity,
-                    current_app.config.get("EU_COMMUNITY_ID"),
+                    current_app.config.get("EU_COMMUNITY_UUID"),
                     [record.pid.pid_value],
                     uow=uow,
                 )
diff --git a/site/zenodo_rdm/curation/ext.py b/site/zenodo_rdm/curation/ext.py
index 3a9dfc68..2f7422f4 100644
--- a/site/zenodo_rdm/curation/ext.py
+++ b/site/zenodo_rdm/curation/ext.py
@@ -7,8 +7,6 @@
 
 """ZenodoRDM Curation module."""
 
-from types import SimpleNamespace
-
 from flask import current_app
 from werkzeug.utils import cached_property
 
@@ -42,3 +40,11 @@ def scores(self):
             **config.CURATION_SCORES,
             **current_app.config.get("CURATION_SCORES", {}),
         }
+
+    @cached_property
+    def thresholds(self):
+        """Return curation thresholds used for rules/curators."""
+        return {
+            **config.CURATION_THRESHOLDS,
+            **current_app.config.get("CURATION_THRESHOLDS", {}),
+        }
diff --git a/site/zenodo_rdm/curation/rules.py b/site/zenodo_rdm/curation/rules.py
index eba7b628..1e1f5c72 100644
--- a/site/zenodo_rdm/curation/rules.py
+++ b/site/zenodo_rdm/curation/rules.py
@@ -19,9 +19,11 @@ def award_acronym_in_description(record):
 
     for f in funding:
         if f["funder"]["id"] == "00k4n6c32":
-            if "award" in f:
-                award = award_service.record_cls.pid.resolve(f["award"]["id"])
-                if award["acronym"].lower() in description.lower():
+            if award_id := f.get("award", {}).get("id"):
+                award = award_service.record_cls.pid.resolve(award_id)
+                if award.get("acronym") and (
+                    award.get("acronym").lower() in description.lower()
+                ):
                     return True
     return False
 
@@ -34,9 +36,11 @@ def award_acronym_in_title(record):
 
     for f in funding:
         if f["funder"]["id"] == "00k4n6c32":
-            if "award" in f:
-                award = award_service.record_cls.pid.resolve(f["award"]["id"])
-                if award["acronym"].lower() in title.lower():
+            if award_id := f.get("award", {}).get("id"):
+                award = award_service.record_cls.pid.resolve(award_id)
+                if award.get("acronym") and (
+                    award.get("acronym").lower() in title.lower()
+                ):
                     return True
     return False
 
@@ -44,7 +48,9 @@ def award_acronym_in_title(record):
 def test_phrases_in_record(record):
     """Check if test words in record."""
     test_phrases = current_app.config.get("CURATION_TEST_PHRASES")
-    record_data = record.metadata["title"] + " " + record.metadata["description"]
+    record_data = (
+        record.metadata["title"] + " " + record.metadata.get("description", "")
+    )
 
     for word in test_phrases:
         if word.lower() in record_data.lower():
diff --git a/site/zenodo_rdm/curation/tasks.py b/site/zenodo_rdm/curation/tasks.py
index ee7a3970..fd23482d 100644
--- a/site/zenodo_rdm/curation/tasks.py
+++ b/site/zenodo_rdm/curation/tasks.py
@@ -6,7 +6,7 @@
 # it under the terms of the MIT License; see LICENSE file for more details.
 """Tasks for curation."""
 
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 
 from celery import shared_task
 from flask import current_app
@@ -32,7 +32,9 @@ def run_eu_record_curation(since):
             dsl.Q(
                 "range",
                 created={
-                    "lte": (datetime.now() - timedelta(days=30)).isoformat(),
+                    "lte": (
+                        datetime.now(timezone.utc) - timedelta(days=30)
+                    ).isoformat(),
                 },
             ),
             dsl.Q(
@@ -45,7 +47,11 @@ def run_eu_record_curation(since):
         must_not=[
             dsl.Q(
                 "term",
-                **{"parent.communities.ids": current_app.config.get("EU_COMMUNITY_ID")},
+                **{
+                    "parent.communities.ids": current_app.config.get(
+                        "EU_COMMUNITY_UUID"
+                    )
+                },
             )
         ],
     )
@@ -63,11 +69,12 @@ def run_eu_record_curation(since):
             ctx["processed"] += 1
             if result["evaluation"]:
                 ctx["approved"] += 1
-        except Exception as e:
-            # NOTE Since curator's raise_exc is by default false, rules would not fail.
-            # This catches failure due to other reasons
+        except Exception:
+            # NOTE Since curator's raise_rules_exc is by default false, rules would not fail.
+            # This catches failures due to other reasons
             ctx["failed"] += 1
+
     current_app.logger.error(
-        f"EU curation processed",
+        "EU curation processed",
         extra=ctx,
     )