Skip to content

Commit

Permalink
metrics: migrate metrics module from legacy (#500)
Browse files Browse the repository at this point in the history
* Imported metrics module from zenodo/zenodo
* Updated license and copyright information
* Updated module for zenodoRDM
* Update download volume aggregation index
* `.format(...) -> f-strings`
* Move and rename config vars
  • Loading branch information
yashlamba authored Sep 18, 2023
1 parent 54507a7 commit ddb51d3
Show file tree
Hide file tree
Showing 10 changed files with 374 additions and 0 deletions.
17 changes: 17 additions & 0 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ from zenodo_rdm.legacy.resources import record_serializers
from zenodo_rdm.tokens import RATSubjectSchema
from zenodo_rdm import providers as zenodo_providers
from zenodo_rdm import facets as zenodo_community_facets
from zenodo_rdm.metrics.config import METRICS_CACHE_UPDATE_INTERVAL

# Flask
# =====
# See https://flask.palletsprojects.com/en/1.1.x/config/
Expand Down Expand Up @@ -109,6 +111,17 @@ APP_RDM_ROUTES["index"] = ("/", frontpage_view_function)
CELERY_BEAT_SCHEDULE.pop("file-checks", None)
CELERY_BEAT_SCHEDULE.pop("file-integrity-report", None)

CELERY_BEAT_SCHEDULE = {
**CELERY_BEAT_SCHEDULE,
"metrics-calculate": {
"task": "zenodo_rdm.metrics.tasks.calculate_metrics",
"kwargs": {
"metric_id": "openaire-nexus",
},
"schedule": METRICS_CACHE_UPDATE_INTERVAL,
},
}

# Flask-Babel
# ===========
# See https://flask-babel.tkte.ch/#configuration
Expand Down Expand Up @@ -501,3 +514,7 @@ ZENODO_RECORDS_UI_CITATIONS_ENABLE = True

OPENAIRE_PORTAL_URL = 'https://explore.openaire.eu'
"""URL to OpenAIRE portal."""

METRICS_UPTIME_ROBOT_METRIC_IDS = {}
METRICS_UPTIME_ROBOT_URL = "https://api.uptimerobot.com/v2/getMonitors"
METRICS_UPTIME_ROBOT_API_KEY = None
5 changes: 5 additions & 0 deletions site/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,20 @@ invenio_base.blueprints =
invenio_base.apps =
zenodo_rdm_legacy = zenodo_rdm.legacy.ext:ZenodoLegacy
profiler = zenodo_rdm.profiler:Profiler
zenodo_rdm_metrics = zenodo_rdm.metrics.ext:ZenodoMetrics
invenio_base.api_apps =
zenodo_rdm_legacy = zenodo_rdm.legacy.ext:ZenodoLegacy
profiler = zenodo_rdm.profiler:Profiler
zenodo_rdm_metrics = zenodo_rdm.metrics.ext:ZenodoMetrics
invenio_base.api_blueprints =
zenodo_rdm_legacy = zenodo_rdm.legacy.views:blueprint
zenodo_rdm_legacy_stub = zenodo_rdm.legacy.views:stub_blueprint
zenodo_rdm_legacy_records = zenodo_rdm.legacy.views:create_legacy_records_bp
zenodo_rdm_legacy_draft_files = zenodo_rdm.legacy.views:create_draft_files_bp
zenodo_rdm_legacy_files_rest = zenodo_rdm.legacy.views:create_files_rest_bp
zenodo_rdm_metrics = zenodo_rdm.metrics.views:blueprint
invenio_celery.tasks =
zenodo_rdm_metrics = zenodo_rdm.metrics.tasks

invenio_assets.webpack =
zenodo_rdm_theme = zenodo_rdm.webpack:theme
Expand Down
8 changes: 8 additions & 0 deletions site/zenodo_rdm/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""ZenodoRDM metrics module."""
129 changes: 129 additions & 0 deletions site/zenodo_rdm/metrics/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""ZenodoRDM Metrics API."""

import calendar
from datetime import datetime, timedelta

import requests
from flask import current_app
from invenio_accounts.models import User
from invenio_communities.communities.records.models import CommunityMetadata
from invenio_files_rest.models import FileInstance
from invenio_search import current_search_client
from invenio_search.utils import build_alias_name
from opensearchpy import Search

from zenodo_rdm.metrics.proxies import current_metrics


class ZenodoMetric(object):
"""API class for Zenodo Metrics."""

@staticmethod
def get_data_transfer():
"""Get file transfer volume in TB."""
time_range = {"gte": current_metrics.metrics_start_date.isoformat()}

search = (
Search(
using=current_search_client,
index=build_alias_name("stats-file-download"),
)
.filter(
"range",
timestamp=time_range,
)
.params(request_timeout=120)
)
search.aggs.metric("download_volume", "sum", field="volume")
result = search[:0].execute().aggregations.to_dict()
download_volume = result.get("download_volume", {}).get("value", 0)

search = (
Search(
using=current_search_client,
index=build_alias_name("rdmrecords-records"),
)
.filter("range", created=time_range)
.params(request_timeout=120)
)
search.aggs.metric("upload_volume", "sum", field="files.totalbytes")
result = search[:0].execute().aggregations.to_dict()
upload_volume = result.get("upload_volume", {}).get("value", 0)

return int(download_volume + upload_volume)

@staticmethod
def get_visitors():
"""Get number of unique zenodo users."""
time_range = {"gte": current_metrics.metrics_start_date.isoformat()}

search = (
Search(
using=current_search_client, index=build_alias_name("events-stats-*")
)
.filter("range", timestamp=time_range)
.params(request_timeout=120)
)

search.aggs.metric("visitors_count", "cardinality", field="visitor_id")
result = search[:0].execute()

if "visitors_count" not in result.aggregations:
return 0

return int(result.aggregations.visitors_count.value)

@staticmethod
def get_uptime():
"""Get Zenodo uptime."""
metrics = current_app.config["METRICS_UPTIME_ROBOT_METRIC_IDS"]
url = current_app.config["METRICS_UPTIME_ROBOT_URL"]
api_key = current_app.config["METRICS_UPTIME_ROBOT_API_KEY"]

end = datetime.utcnow().replace(
day=1, hour=0, minute=0, second=0, microsecond=0
)
start = (end - timedelta(days=1)).replace(day=1)
end_ts = calendar.timegm(end.utctimetuple())
start_ts = calendar.timegm(start.utctimetuple())

res = requests.post(
url,
json={
"api_key": api_key,
"custom_uptime_ranges": f"{start_ts}_{end_ts}",
},
)

return sum(
float(d["custom_uptime_ranges"])
for d in res.json()["monitors"]
if d["id"] in metrics
) / len(metrics)

@staticmethod
def get_researchers():
"""Get number of unique zenodo users."""
return User.query.filter(
User.confirmed_at.isnot(None),
User.active.is_(True),
).count()

@staticmethod
def get_files():
"""Get number of files."""
return FileInstance.query.count()

@staticmethod
def get_communities():
"""Get number of active communities."""
return CommunityMetadata.query.filter(
CommunityMetadata.is_deleted.is_(False)
).count()
63 changes: 63 additions & 0 deletions site/zenodo_rdm/metrics/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# Zenodo is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Configuration for ZenodoRDM Metrics."""

import datetime

from zenodo_rdm.metrics.api import ZenodoMetric

METRICS_START_DATE = datetime.datetime(2021, 1, 1)
METRICS_CACHE_TIMEOUT = int(datetime.timedelta(hours=1).total_seconds())
METRICS_CACHE_UPDATE_INTERVAL = datetime.timedelta(minutes=30)

METRICS_DATA = {
"openaire-nexus": [
{
"name": "zenodo_nexus_data_transfer_bytes_total",
"help": (
"Bytes of data transferred from/to Zenodo during the "
"OpenAIRE-NEXUS project (i.e. from 2021-01-01)."
),
"type": "counter",
"value": ZenodoMetric.get_data_transfer,
},
{
"name": "zenodo_nexus_unique_visitors_web_total",
"help": (
"Total of daily unique visitors on Zenodo portal during the "
"OpenAIRE-NEXUS project (i.e. from 2021-01-01)."
),
"type": "counter",
"value": ZenodoMetric.get_visitors,
},
{
"name": "zenodo_last_month_uptime_ratio",
"help": "Zenodo uptime percentage for the last month.",
"type": "gauge",
"value": ZenodoMetric.get_uptime,
},
{
"name": "zenodo_researchers",
"help": "Number of researchers registered on Zenodo",
"type": "gauge",
"value": ZenodoMetric.get_researchers,
},
{
"name": "zenodo_files",
"help": "Number of files hosted on Zenodo",
"type": "gauge",
"value": ZenodoMetric.get_files,
},
{
"name": "zenodo_communities",
"help": "Number of Zenodo communities created",
"type": "gauge",
"value": ZenodoMetric.get_communities,
},
]
}
38 changes: 38 additions & 0 deletions site/zenodo_rdm/metrics/ext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""ZenodoRDM Metrics module."""

from flask import current_app

from zenodo_rdm.metrics import config


class ZenodoMetrics(object):
"""Zenodo frontpage extension."""

def __init__(self, app=None):
"""Extension initialization."""
if app:
self.init_app(app)

@staticmethod
def init_config(app):
"""Initialize configuration."""
for k in dir(config):
if k.startswith("METRICS_"):
app.config.setdefault(k, getattr(config, k))

def init_app(self, app):
"""Flask application initialization."""
self.init_config(app)
app.extensions["zenodo-metrics"] = self

@property
def metrics_start_date(self):
"""Get get metrics start date from config."""
return current_app.config["METRICS_START_DATE"]
13 changes: 13 additions & 0 deletions site/zenodo_rdm/metrics/proxies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Proxy objects for easier access to application objects."""

from flask import current_app
from werkzeug.local import LocalProxy

current_metrics = LocalProxy(lambda: current_app.extensions["zenodo-metrics"])
18 changes: 18 additions & 0 deletions site/zenodo_rdm/metrics/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Tasks for metrics."""

from celery import shared_task

from zenodo_rdm.metrics import utils


@shared_task(ignore_result=True)
def calculate_metrics(metric_id=None):
"""Calculate metrics for the passed metric ID."""
utils.calculate_metrics(metric_id)
47 changes: 47 additions & 0 deletions site/zenodo_rdm/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Utilities for metrics module."""
from copy import deepcopy

from flask import current_app
from invenio_cache import current_cache


def get_metrics(metric_id):
"""Get metrics from cache."""
cached_data = current_cache.get(f"METRICS_CACHE::{metric_id}")
if cached_data is not None:
return cached_data


def calculate_metrics(metric_id, cache=True):
"""Calculate a metric's result."""
result = deepcopy(current_app.config["METRICS_DATA"][metric_id])

for metric in result:
metric["value"] = metric["value"]()

if cache:
current_cache.set(
f"METRICS_CACHE::{metric_id}",
result,
timeout=current_app.config["METRICS_CACHE_TIMEOUT"],
)

return result


def formatted_response(metrics):
"""Format metrics into Prometheus format."""
response = ""
for metric in metrics:
response += (
"# HELP {name} {help}\n# TYPE {name} {type}\n{name} {value}\n"
).format(**metric)

return response
Loading

0 comments on commit ddb51d3

Please sign in to comment.