Skip to content

Commit

Permalink
curation: add rules to check additional desc
Browse files Browse the repository at this point in the history
  • Loading branch information
yashlamba committed Dec 12, 2024
1 parent a11ff41 commit cb6b506
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 6 deletions.
21 changes: 15 additions & 6 deletions site/zenodo_rdm/curation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"""Moderation config."""

from .rules import (
additional_desc_contains_high_conf_keywords,
additional_desc_contains_low_conf_keywords,
award_acronym_in_additional_description,
award_acronym_in_description,
award_acronym_in_title,
contains_high_conf_keywords,
Expand All @@ -25,22 +28,28 @@
"user_verified": user_verified,
"contains_low_conf_keywords": contains_low_conf_keywords,
"contains_high_conf_keywords": contains_high_conf_keywords,
"additional_desc_contains_low_conf_keywords": additional_desc_contains_low_conf_keywords,
"additional_desc_contains_high_conf_keywords": additional_desc_contains_high_conf_keywords,
"award_acronym_in_additional_description": award_acronym_in_additional_description,
}
"""Rules to run for EU Curation."""

CURATION_SCORES = {
"award_acronym_in_title": 5,
"award_acronym_in_description": 10,
"award_acronym_in_title": 0,
"award_acronym_in_description": 0,
"test_phrases_in_record": False,
"published_before_award_start": False,
"user_verified": 5,
"contains_low_conf_keywords": 5,
"contains_high_conf_keywords": 10,
"user_verified": 0,
"contains_low_conf_keywords": 0,
"contains_high_conf_keywords": 0,
"additional_desc_contains_low_conf_keywords": 0,
"additional_desc_contains_high_conf_keywords": 0,
"award_acronym_in_additional_description": 0,
}
"""Rule scores for EU Curation."""


CURATION_THRESHOLDS = {"EU_RECORDS_CURATION": 15}
CURATION_THRESHOLDS = {"EU_RECORDS_CURATION": 100}
"""Threshold values for curators/rules."""


Expand Down
42 changes: 42 additions & 0 deletions site/zenodo_rdm/curation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,45 @@ def contains_high_conf_keywords(record):
if word.lower() in record_data.lower():
return True
return False


def additional_desc_contains_high_conf_keywords(record):
high_conf_keywords_eu = current_app.config.get("CURATION_HIGH_CONF_KEYWORDS_EU")
additional_descriptions = record.metadata.get("additional_descriptions", [])
record_data = " ".join([x.get("description") for x in additional_descriptions])

for word in high_conf_keywords_eu:
# TODO could possibly return a number for higher conf
if word.lower() in record_data.lower():
return True
return False


def additional_desc_contains_low_conf_keywords(record):
low_conf_keywords_eu = current_app.config.get("CURATION_LOW_CONF_KEYWORDS_EU")
additional_descriptions = record.metadata.get("additional_descriptions", [])
record_data = " ".join([x.get("description") for x in additional_descriptions])

for word in low_conf_keywords_eu:
# TODO could possibly return a number for higher conf
if word.lower() in record_data.lower():
return True
return False


def award_acronym_in_additional_description(record):
"""Check if EU award name in record description."""
award_service = current_service_registry.get("awards")
additional_descriptions = record.metadata.get("additional_descriptions", [])
record_data = " ".join([x.get("description") for x in additional_descriptions])

funding = record.metadata.get("funding", [])
for f in funding:
if f["funder"].get("id") == "00k4n6c32":
if award_id := f.get("award", {}).get("id"):
award = award_service.record_cls.pid.resolve(award_id)
if award.get("acronym") and (
award.get("acronym").lower() in record_data.lower()
):
return True
return False

0 comments on commit cb6b506

Please sign in to comment.