From a470c6312a785d116c48f48cb167a0760c6c7ed5 Mon Sep 17 00:00:00 2001
From: yashlamba <yashlamba2000@gmail.com>
Date: Mon, 11 Nov 2024 10:37:26 +0100
Subject: [PATCH] moderation: add rule results to dict and eval

---
 invenio.cfg                            |  8 ++++---
 site/zenodo_rdm/moderation/config.py   | 26 +++++++++++-----------
 site/zenodo_rdm/moderation/handlers.py | 30 +++++++++++++++++---------
 3 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/invenio.cfg b/invenio.cfg
index d211678a..4af9daa8 100644
--- a/invenio.cfg
+++ b/invenio.cfg
@@ -76,7 +76,7 @@ from zenodo_rdm.github.schemas import CitationMetadataSchema
 from zenodo_rdm.legacy.resources import record_serializers
 from zenodo_rdm.metrics.config import METRICS_CACHE_UPDATE_INTERVAL
 from zenodo_rdm.moderation.errors import UserBlockedException
-from zenodo_rdm.moderation.handlers import CommunityScoreHandler, RecordScoreHandler
+from zenodo_rdm.moderation.handlers import CommunityModerationHandler, RecordModerationHandler
 from zenodo_rdm.openaire.records.components import OpenAIREComponent
 from zenodo_rdm.permissions import (
     ZenodoCommunityPermissionPolicy,
@@ -817,11 +817,11 @@ RDM_RECORDS_SERVICE_COMPONENTS = DefaultRecordsComponents + [
 """Addd OpenAIRE component to records service."""
 
 RDM_CONTENT_MODERATION_HANDLERS = [
-    RecordScoreHandler(),
+    RecordModerationHandler(),
 ]
 """Records content moderation handlers."""
 RDM_COMMUNITY_CONTENT_MODERATION_HANDLERS = [
-    CommunityScoreHandler(),
+    CommunityModerationHandler(),
 ]
 """Community content moderation handlers."""
 
@@ -1062,3 +1062,5 @@ COMMUNITIES_SHOW_BROWSE_MENU_ENTRY = True
 
 JOBS_ADMINISTRATION_ENABLED = True
 """Enable Jobs administration view."""
+
+SPAM_DETECTOR_MODEL="spam-scikit:1.0.0"
diff --git a/site/zenodo_rdm/moderation/config.py b/site/zenodo_rdm/moderation/config.py
index 4c2af3ae..309e0008 100644
--- a/site/zenodo_rdm/moderation/config.py
+++ b/site/zenodo_rdm/moderation/config.py
@@ -45,21 +45,21 @@
 MODERATION_EXEMPT_USERS = []
 """List of users exempt from moderation."""
 
-MODERATION_RECORD_SCORE_RULES = [
-    verified_user_rule,
-    links_rule,
-    files_rule,
-    text_sanitization_rule,
-    match_query_rule,
-]
+MODERATION_RECORD_SCORE_RULES = {
+    "verified_user_rule": verified_user_rule,
+    "links_rule": links_rule,
+    "files_rule": files_rule,
+    "text_sanitization_rule": text_sanitization_rule,
+    "match_query_rule": match_query_rule,
+}
 """Scoring rules for record moderation."""
 
-MODERATION_COMMUNITY_SCORE_RULES = [
-    links_rule,
-    text_sanitization_rule,
-    verified_user_rule,
-    match_query_rule,
-]
+MODERATION_COMMUNITY_SCORE_RULES = {
+    "links_rule": links_rule,
+    "text_sanitization_rule": text_sanitization_rule,
+    "verified_user_rule": verified_user_rule,
+    "match_query_rule": match_query_rule,
+}
 """Scoring rules for communtiy moderation."""
 
 MODERATION_PERCOLATOR_INDEX_PREFIX = "moderation-queries"
diff --git a/site/zenodo_rdm/moderation/handlers.py b/site/zenodo_rdm/moderation/handlers.py
index 1118aed4..b460baf3 100644
--- a/site/zenodo_rdm/moderation/handlers.py
+++ b/site/zenodo_rdm/moderation/handlers.py
@@ -44,7 +44,7 @@
 from .uow import ExceptionOp
 
 
-class BaseScoreHandler:
+class BaseModerationHandler:
     """Base handler to calculate moderation scores based on rules."""
 
     def __init__(self, rules=None):
@@ -56,7 +56,11 @@ def rules(self):
         """Get scoring rules."""
         if isinstance(self._rules, str):
             return current_app.config[self._rules]
-        return self._rules or []
+        return self._rules or {}
+
+    def evaluate_result(self, params):
+        """Evaluate aggregate result based on params."""
+        return sum(params.values())
 
     @property
     def should_apply_actions(self):
@@ -77,17 +81,19 @@ def run(self, identity, draft=None, record=None, user=None, uow=None):
                 )
                 return
 
-            score = 0
-            for rule in self.rules:
-                score += rule(identity, draft=draft, record=record)
+            results = {}
+            for name, rule in self.rules.items():
+                results[name] = rule(identity, draft=draft, record=record)
 
             action_ctx = {
                 "user_id": user.id,
                 "record_pid": record.pid.pid_value,
-                "score": score,
+                "results": results,
             }
             current_app.logger.debug("Moderation score calculated", extra=action_ctx)
-            if score > current_scores.spam_threshold:
+
+            evaluation = self.evaluate_result(results)
+            if evaluation > current_scores.spam_threshold:
                 action_ctx["action"] = "block"
                 if self.should_apply_actions:
                     # If user is verified, we need to (re)open the moderation
@@ -102,9 +108,11 @@ def run(self, identity, draft=None, record=None, user=None, uow=None):
                         "Block moderation action triggered",
                         extra=action_ctx,
                     )
-            elif score < current_scores.ham_threshold:
+
+            elif evaluation < current_scores.ham_threshold:
                 action_ctx["action"] = "approve"
 
+                # If the user is already verified, we don't need to verify again
                 if user.verified:
                     current_app.logger.debug(
                         "User is verified, skipping moderation actions",
@@ -187,7 +195,7 @@ def _block(self, user, uow, action_ctx):
         raise UserBlockedException()
 
 
-class RecordScoreHandler(BaseHandler, BaseScoreHandler):
+class RecordModerationHandler(BaseHandler, BaseModerationHandler):
     """Handler for calculating scores for records."""
 
     def __init__(self):
@@ -222,7 +230,9 @@ def publish(self, identity, draft=None, record=None, uow=None, **kwargs):
         self.run(identity, record=record, user=user, uow=uow)
 
 
-class CommunityScoreHandler(community_moderation.BaseHandler, BaseScoreHandler):
+class CommunityModerationHandler(
+    community_moderation.BaseHandler, BaseModerationHandler
+):
     """Handler for calculating scores for communities."""
 
     def __init__(self):