Skip to content

Commit

Permalink
curation: update scoring config; add proxies
Browse files Browse the repository at this point in the history
  • Loading branch information
yashlamba authored and slint committed Nov 28, 2024
1 parent 3eff5bd commit 8427967
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 16 deletions.
7 changes: 7 additions & 0 deletions site/zenodo_rdm/curation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,12 @@
}
"""Rules to run for EU Curation."""

CURATION_SCORES = {
"award_acronym_in_title": 5,
"award_acronym_in_description": 10,
"test_phrases_in_record": False,
}
"""Rule scores for EU Curation."""

CURATION_ENABLE_EU_CURATOR = False
"""Controls whether to dry run EU Curation."""
22 changes: 13 additions & 9 deletions site/zenodo_rdm/curation/curators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@

"""Curators for ZenodoRDM Curation."""


from flask import current_app
from invenio_access.permissions import system_identity
from invenio_rdm_records.proxies import current_record_communities_service
from invenio_records_resources.services.uow import UnitOfWork

from zenodo_rdm.curation.proxies import current_curation


class BaseCurator:
"""Base Curator class."""

def __init__(self, dry=False, raise_exc=False) -> None:
def __init__(self, dry=False, raise_exc=False):
"""Constructor."""
self.dry = dry
self.raise_exc = raise_exc
Expand Down Expand Up @@ -59,13 +60,16 @@ def _evaluator(self, results):
"""Evaluate result for EC curation."""
score = 0
for rule, result in results.items():
# TODO put in config?
if rule == "award_in_title" and result:
score += 5
if rule == "award_in_description" and result:
score += 10
if rule == "test_word_record" and result:
return False
rule_score = current_curation.scores.get(rule)
if isinstance(rule_score, int):
score += rule_score if result else 0
elif isinstance(rule_score, bool):
if result:
return rule_score
else:
continue
else:
raise ValueError("Unsupported score type configured.")
return score >= current_app.config.get("CURATION_EU_CURATION_THRESHOLD")

@property
Expand Down
8 changes: 8 additions & 0 deletions site/zenodo_rdm/curation/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,11 @@ def init_app(self, app):
"""Flask application initialization."""
self.init_config(app)
app.extensions["zenodo-curation"] = self

@cached_property
def scores(self):
"""Return curation scores used for rules."""
return {
**config.CURATION_SCORES,
**current_app.config.get("CURATION_SCORES", {}),
}
13 changes: 13 additions & 0 deletions site/zenodo_rdm/curation/proxies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Proxy objects for easier access to application objects."""

from flask import current_app
from werkzeug.local import LocalProxy

current_curation = LocalProxy(lambda: current_app.extensions["zenodo-curation"])
3 changes: 0 additions & 3 deletions site/zenodo_rdm/curation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

def award_acronym_in_description(record):
"""Check if EU award name in record description."""

award_service = current_service_registry.get("awards")
description = record.metadata["description"]
funding = record.metadata["funding"]
Expand All @@ -29,7 +28,6 @@ def award_acronym_in_description(record):

def award_acronym_in_title(record):
"""Check if EU award name in record title."""

award_service = current_service_registry.get("awards")
title = record.metadata["title"]
funding = record.metadata["funding"]
Expand All @@ -45,7 +43,6 @@ def award_acronym_in_title(record):

def test_phrases_in_record(record):
"""Check if test words in record."""

test_phrases = current_app.config.get("CURATION_TEST_PHRASES")
record_data = record.metadata["title"] + " " + record.metadata["description"]

Expand Down
16 changes: 12 additions & 4 deletions site/zenodo_rdm/curation/tasks.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Tasks for curation."""

from datetime import datetime, timedelta

from celery import shared_task
from flask import current_app
from invenio_access.permissions import system_identity
from invenio_rdm_records.proxies import current_rdm_records_service as records_service
from invenio_search.engine import dsl

from zenodo_rdm.curation.curators import EURecordCurator


Expand Down Expand Up @@ -52,13 +61,12 @@ def run_eu_record_curation(since):
try:
result = curator.run(record=record)
ctx["processed"] += 1
except Exception:
if result["evaluation"]:
ctx["approved"] += 1
except Exception as e:
# NOTE Since curator's raise_exc is by default false, rules would not fail.
# This catches failure due to other reasons
ctx["failed"] += 1
if result["evaluation"]:
ctx["approved"] += 1

current_app.logger.error(
f"EU curation processed",
extra=ctx,
Expand Down

0 comments on commit 8427967

Please sign in to comment.