Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: Added the metrics module from legacy #500

Merged
merged 11 commits into from
Sep 18, 2023
17 changes: 17 additions & 0 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ from zenodo_rdm.legacy.resources import record_serializers
from zenodo_rdm.tokens import RATSubjectSchema
from zenodo_rdm import providers as zenodo_providers
from zenodo_rdm import facets as zenodo_community_facets
from zenodo_rdm.metrics.config import METRICS_CACHE_UPDATE_INTERVAL

# Flask
# =====
# See https://flask.palletsprojects.com/en/1.1.x/config/
Expand Down Expand Up @@ -109,6 +111,17 @@ APP_RDM_ROUTES["index"] = ("/", frontpage_view_function)
CELERY_BEAT_SCHEDULE.pop("file-checks", None)
CELERY_BEAT_SCHEDULE.pop("file-integrity-report", None)

CELERY_BEAT_SCHEDULE = {
**CELERY_BEAT_SCHEDULE,
"metrics-calculate": {
"task": "zenodo_rdm.metrics.tasks.calculate_metrics",
"kwargs": {
"metric_id": "openaire-nexus",
},
"schedule": METRICS_CACHE_UPDATE_INTERVAL,
},
}

# Flask-Babel
# ===========
# See https://flask-babel.tkte.ch/#configuration
Expand Down Expand Up @@ -501,3 +514,7 @@ ZENODO_RECORDS_UI_CITATIONS_ENABLE = True

OPENAIRE_PORTAL_URL = 'https://explore.openaire.eu'
"""URL to OpenAIRE portal."""

METRICS_UPTIME_ROBOT_METRIC_IDS = {}
METRICS_UPTIME_ROBOT_URL = "https://api.uptimerobot.com/v2/getMonitors"
METRICS_UPTIME_ROBOT_API_KEY = None
5 changes: 5 additions & 0 deletions site/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,20 @@ invenio_base.blueprints =
invenio_base.apps =
zenodo_rdm_legacy = zenodo_rdm.legacy.ext:ZenodoLegacy
profiler = zenodo_rdm.profiler:Profiler
zenodo_rdm_metrics = zenodo_rdm.metrics.ext:ZenodoMetrics
invenio_base.api_apps =
zenodo_rdm_legacy = zenodo_rdm.legacy.ext:ZenodoLegacy
profiler = zenodo_rdm.profiler:Profiler
zenodo_rdm_metrics = zenodo_rdm.metrics.ext:ZenodoMetrics
invenio_base.api_blueprints =
zenodo_rdm_legacy = zenodo_rdm.legacy.views:blueprint
zenodo_rdm_legacy_stub = zenodo_rdm.legacy.views:stub_blueprint
zenodo_rdm_legacy_records = zenodo_rdm.legacy.views:create_legacy_records_bp
zenodo_rdm_legacy_draft_files = zenodo_rdm.legacy.views:create_draft_files_bp
zenodo_rdm_legacy_files_rest = zenodo_rdm.legacy.views:create_files_rest_bp
zenodo_rdm_metrics = zenodo_rdm.metrics.views:blueprint
invenio_celery.tasks =
zenodo_rdm_metrics = zenodo_rdm.metrics.tasks

invenio_assets.webpack =
zenodo_rdm_theme = zenodo_rdm.webpack:theme
Expand Down
8 changes: 8 additions & 0 deletions site/zenodo_rdm/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""ZenodoRDM metrics module."""
129 changes: 129 additions & 0 deletions site/zenodo_rdm/metrics/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""ZenodoRDM Metrics API."""

import calendar
from datetime import datetime, timedelta

import requests
from flask import current_app
from invenio_accounts.models import User
from invenio_communities.communities.records.models import CommunityMetadata
from invenio_files_rest.models import FileInstance
from invenio_search import current_search_client
from invenio_search.utils import build_alias_name
from opensearchpy import Search

from zenodo_rdm.metrics.proxies import current_metrics


class ZenodoMetric(object):
"""API class for Zenodo Metrics."""

@staticmethod
def get_data_transfer():
"""Get file transfer volume in TB."""
time_range = {"gte": current_metrics.metrics_start_date.isoformat()}

search = (
Search(
using=current_search_client,
index=build_alias_name("stats-file-download"),
)
.filter(
"range",
timestamp=time_range,
)
.params(request_timeout=120)
)
search.aggs.metric("download_volume", "sum", field="volume")
result = search[:0].execute().aggregations.to_dict()
download_volume = result.get("download_volume", {}).get("value", 0)

search = (
Search(
using=current_search_client,
index=build_alias_name("rdmrecords-records"),
)
.filter("range", created=time_range)
.params(request_timeout=120)
)
search.aggs.metric("upload_volume", "sum", field="files.totalbytes")
result = search[:0].execute().aggregations.to_dict()
upload_volume = result.get("upload_volume", {}).get("value", 0)

return int(download_volume + upload_volume)

@staticmethod
def get_visitors():
"""Get number of unique zenodo users."""
time_range = {"gte": current_metrics.metrics_start_date.isoformat()}

search = (
Search(
using=current_search_client, index=build_alias_name("events-stats-*")
)
.filter("range", timestamp=time_range)
.params(request_timeout=120)
)

search.aggs.metric("visitors_count", "cardinality", field="visitor_id")
result = search[:0].execute()

if "visitors_count" not in result.aggregations:
return 0

return int(result.aggregations.visitors_count.value)

@staticmethod
def get_uptime():
"""Get Zenodo uptime."""
metrics = current_app.config["METRICS_UPTIME_ROBOT_METRIC_IDS"]
url = current_app.config["METRICS_UPTIME_ROBOT_URL"]
api_key = current_app.config["METRICS_UPTIME_ROBOT_API_KEY"]

end = datetime.utcnow().replace(
day=1, hour=0, minute=0, second=0, microsecond=0
)
start = (end - timedelta(days=1)).replace(day=1)
end_ts = calendar.timegm(end.utctimetuple())
start_ts = calendar.timegm(start.utctimetuple())

res = requests.post(
url,
json={
"api_key": api_key,
"custom_uptime_ranges": f"{start_ts}_{end_ts}",
},
)

return sum(
float(d["custom_uptime_ranges"])
for d in res.json()["monitors"]
if d["id"] in metrics
) / len(metrics)

@staticmethod
def get_researchers():
"""Get number of unique zenodo users."""
return User.query.filter(
User.confirmed_at.isnot(None),
User.active.is_(True),
).count()

@staticmethod
def get_files():
"""Get number of files."""
return FileInstance.query.count()

@staticmethod
def get_communities():
"""Get number of active communities."""
return CommunityMetadata.query.filter(
CommunityMetadata.is_deleted.is_(False)
).count()
63 changes: 63 additions & 0 deletions site/zenodo_rdm/metrics/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# Zenodo is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Configuration for ZenodoRDM Metrics."""

import datetime

from zenodo_rdm.metrics.api import ZenodoMetric

METRICS_START_DATE = datetime.datetime(2021, 1, 1)
METRICS_CACHE_TIMEOUT = int(datetime.timedelta(hours=1).total_seconds())
METRICS_CACHE_UPDATE_INTERVAL = datetime.timedelta(minutes=30)

METRICS_DATA = {
"openaire-nexus": [
{
"name": "zenodo_nexus_data_transfer_bytes_total",
"help": (
"Bytes of data transferred from/to Zenodo during the "
"OpenAIRE-NEXUS project (i.e. from 2021-01-01)."
),
"type": "counter",
"value": ZenodoMetric.get_data_transfer,
},
{
"name": "zenodo_nexus_unique_visitors_web_total",
"help": (
"Total of daily unique visitors on Zenodo portal during the "
"OpenAIRE-NEXUS project (i.e. from 2021-01-01)."
),
"type": "counter",
"value": ZenodoMetric.get_visitors,
},
{
"name": "zenodo_last_month_uptime_ratio",
"help": "Zenodo uptime percentage for the last month.",
"type": "gauge",
"value": ZenodoMetric.get_uptime,
},
{
"name": "zenodo_researchers",
"help": "Number of researchers registered on Zenodo",
"type": "gauge",
"value": ZenodoMetric.get_researchers,
},
{
"name": "zenodo_files",
"help": "Number of files hosted on Zenodo",
"type": "gauge",
"value": ZenodoMetric.get_files,
},
{
"name": "zenodo_communities",
"help": "Number of Zenodo communities created",
"type": "gauge",
"value": ZenodoMetric.get_communities,
},
]
}
38 changes: 38 additions & 0 deletions site/zenodo_rdm/metrics/ext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""ZenodoRDM Metrics module."""

from flask import current_app

from zenodo_rdm.metrics import config


class ZenodoMetrics(object):
"""Zenodo frontpage extension."""

def __init__(self, app=None):
"""Extension initialization."""
if app:
self.init_app(app)

@staticmethod
def init_config(app):
"""Initialize configuration."""
for k in dir(config):
if k.startswith("METRICS_"):
app.config.setdefault(k, getattr(config, k))

def init_app(self, app):
"""Flask application initialization."""
self.init_config(app)
app.extensions["zenodo-metrics"] = self

@property
def metrics_start_date(self):
"""Get get metrics start date from config."""
return current_app.config["METRICS_START_DATE"]
13 changes: 13 additions & 0 deletions site/zenodo_rdm/metrics/proxies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Proxy objects for easier access to application objects."""

from flask import current_app
from werkzeug.local import LocalProxy

current_metrics = LocalProxy(lambda: current_app.extensions["zenodo-metrics"])
18 changes: 18 additions & 0 deletions site/zenodo_rdm/metrics/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Tasks for metrics."""

from celery import shared_task

from zenodo_rdm.metrics import utils


@shared_task(ignore_result=True)
def calculate_metrics(metric_id=None):
"""Calculate metrics for the passed metric ID."""
utils.calculate_metrics(metric_id)
47 changes: 47 additions & 0 deletions site/zenodo_rdm/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Utilities for metrics module."""
from copy import deepcopy

from flask import current_app
from invenio_cache import current_cache


def get_metrics(metric_id):
"""Get metrics from cache."""
cached_data = current_cache.get(f"METRICS_CACHE::{metric_id}")
if cached_data is not None:
return cached_data


def calculate_metrics(metric_id, cache=True):
"""Calculate a metric's result."""
result = deepcopy(current_app.config["METRICS_DATA"][metric_id])

for metric in result:
metric["value"] = metric["value"]()

if cache:
current_cache.set(
f"METRICS_CACHE::{metric_id}",
result,
timeout=current_app.config["METRICS_CACHE_TIMEOUT"],
)

return result


def formatted_response(metrics):
"""Format metrics into Prometheus format."""
response = ""
for metric in metrics:
response += (
"# HELP {name} {help}\n# TYPE {name} {type}\n{name} {value}\n"
).format(**metric)

return response
Loading