diff --git a/requirements-rootless.txt b/requirements-rootless.txt index f6a6a8232..b94291383 100644 --- a/requirements-rootless.txt +++ b/requirements-rootless.txt @@ -19,6 +19,7 @@ itsdangerous~=2.1.2 Jinja2~=3.1.2 ldap3~=2.9.1 MarkupSafe~=2.1.2 +prometheus-client~=0.20.0 pyasn1~=0.5.0 python-dateutil~=2.8.2 python-gnupg~=0.5.0 diff --git a/src/app.py b/src/app.py index 0d780ea20..288c14f2d 100755 --- a/src/app.py +++ b/src/app.py @@ -1079,6 +1079,18 @@ def _get_authorized(): return default_authorized +try: + from werkzeug.middleware.dispatcher import DispatcherMiddleware + from prometheus_client import make_wsgi_app + # Enable prometheus integration with the topology webapp + app.wsgi_app = DispatcherMiddleware(app.wsgi_app, { + '/metrics': make_wsgi_app() + }) +except ImportError: + print("*** /metrics endpoint unavailable: prometheus-client missing", + file=sys.stderr) + + if __name__ == '__main__': if "--auth" in sys.argv[1:]: default_authorized = True diff --git a/src/webapp/models.py b/src/webapp/models.py index d8748ee74..cc8d13429 100644 --- a/src/webapp/models.py +++ b/src/webapp/models.py @@ -1,3 +1,4 @@ +import contextlib import datetime import logging import os @@ -5,6 +6,22 @@ from typing import Dict, Set, List, Optional import yaml +try: + from prometheus_client import Summary +except ImportError: + class Summary: + """A dummy prometheus_client.Summary class""" + + def __init__(self, name: str, documentation: str): + _ = name + _ = documentation + + @contextlib.contextmanager + def time(self): + pass + yield + pass + from webapp import common, contacts_reader, ldap_data, mappings, project_reader, rg_reader, vo_reader from webapp.common import readfile @@ -15,6 +32,11 @@ log = logging.getLogger(__name__) +topology_update_summary = Summary('topology_update_seconds', 'Time spent updating the topology repo data') +contact_update_summary = Summary('contact_update_seconds', 'Time spent updating the contact repo data') +comanage_update_summary = Summary('comanage_update_seconds', 'Time spent updating the comanage LDAP data') +ligo_update_summary = Summary('ligo_update_seconds', 'Time spent updating the LIGO LDAP data') + class CachedData: def __init__(self, data=None, timestamp=0, force_update=True, cache_lifetime=60*15, @@ -153,17 +175,18 @@ def get_contact_db_data(self) -> Optional[ContactsData]: data = contacts_reader.get_contacts_data(None) self.contacts_data.update(data) elif self.contacts_data.should_update(): - ok = self._update_contacts_repo() - if ok: - try: - self.contacts_data.update(contacts_reader.get_contacts_data(self.contacts_file)) - except Exception: - if self.strict: - raise - log.exception("Failed to update contacts data") + with contact_update_summary.time(): + ok = self._update_contacts_repo() + if ok: + try: + self.contacts_data.update(contacts_reader.get_contacts_data(self.contacts_file)) + except Exception: + if self.strict: + raise + log.exception("Failed to update contacts data") + self.contacts_data.try_again() + else: self.contacts_data.try_again() - else: - self.contacts_data.try_again() return self.contacts_data.data @@ -179,15 +202,16 @@ def get_comanage_data(self) -> Optional[ContactsData]: data = contacts_reader.get_contacts_data(None) self.comanage_data.update(data) elif self.comanage_data.should_update(): - try: - idmap = self.get_cilogon_ldap_id_map() - data = ldap_data.cilogon_id_map_to_yaml_data(idmap) - self.comanage_data.update(ContactsData(data)) - except Exception: - if self.strict: - raise - log.exception("Failed to update comanage data") - self.comanage_data.try_again() + with comanage_update_summary.time(): + try: + idmap = self.get_cilogon_ldap_id_map() + data = ldap_data.cilogon_id_map_to_yaml_data(idmap) + self.comanage_data.update(ContactsData(data)) + except Exception: + if self.strict: + raise + log.exception("Failed to update comanage data") + self.comanage_data.try_again() return self.comanage_data.data @@ -227,15 +251,16 @@ def get_ligo_dn_list(self) -> Optional[List[str]]: "getting empty list") return [] elif self.ligo_dn_list.should_update(): - try: - ligo_ldap_pass = readfile(self.ligo_ldap_passfile, log) - new_dn_list = ldap_data.get_ligo_ldap_dn_list(self.ligo_ldap_url, self.ligo_ldap_user, ligo_ldap_pass) - self.ligo_dn_list.update(new_dn_list) - except Exception: - if self.strict: - raise - log.exception("Failed to update LIGO data") - self.ligo_dn_list.try_again() + with ligo_update_summary.time(): + try: + ligo_ldap_pass = readfile(self.ligo_ldap_passfile, log) + new_dn_list = ldap_data.get_ligo_ldap_dn_list(self.ligo_ldap_url, self.ligo_ldap_user, ligo_ldap_pass) + self.ligo_dn_list.update(new_dn_list) + except Exception: + if self.strict: + raise + log.exception("Failed to update LIGO data") + self.ligo_dn_list.try_again() return self.ligo_dn_list.data @@ -261,17 +286,18 @@ def get_topology(self) -> Optional[Topology]: May return None if we fail to get the data for the first time. """ if self.topology.should_update(): - ok = self._update_topology_repo() - if ok: - try: - self.topology.update(rg_reader.get_topology(self.topology_dir, self.get_contacts_data(), strict=self.strict)) - except Exception: - if self.strict: - raise - log.exception("Failed to update topology") + with topology_update_summary.time(): + ok = self._update_topology_repo() + if ok: + try: + self.topology.update(rg_reader.get_topology(self.topology_dir, self.get_contacts_data(), strict=self.strict)) + except Exception: + if self.strict: + raise + log.exception("Failed to update topology") + self.topology.try_again() + else: self.topology.try_again() - else: - self.topology.try_again() return self.topology.data @@ -281,17 +307,18 @@ def get_vos_data(self) -> Optional[VOsData]: May return None if we fail to get the data for the first time. """ if self.vos_data.should_update(): - ok = self._update_topology_repo() - if ok: - try: - self.vos_data.update(vo_reader.get_vos_data(self.vos_dir, self.get_contacts_data(), strict=self.strict)) - except Exception: - if self.strict: - raise - log.exception("Failed to update VOs") + with topology_update_summary.time(): + ok = self._update_topology_repo() + if ok: + try: + self.vos_data.update(vo_reader.get_vos_data(self.vos_dir, self.get_contacts_data(), strict=self.strict)) + except Exception: + if self.strict: + raise + log.exception("Failed to update VOs") + self.vos_data.try_again() + else: self.vos_data.try_again() - else: - self.vos_data.try_again() return self.vos_data.data @@ -301,17 +328,18 @@ def get_projects(self) -> Optional[Dict]: May return None if we fail to get the data for the first time. """ if self.projects.should_update(): - ok = self._update_topology_repo() - if ok: - try: - self.projects.update(project_reader.get_projects(self.projects_dir, strict=self.strict)) - except Exception: - if self.strict: - raise - log.exception("Failed to update projects") + with topology_update_summary.time(): + ok = self._update_topology_repo() + if ok: + try: + self.projects.update(project_reader.get_projects(self.projects_dir, strict=self.strict)) + except Exception: + if self.strict: + raise + log.exception("Failed to update projects") + self.projects.try_again() + else: self.projects.try_again() - else: - self.projects.try_again() return self.projects.data @@ -323,17 +351,18 @@ def get_mappings(self, strict=None) -> Optional[mappings.Mappings]: if strict is None: strict = self.strict if self.mappings.should_update(): - ok = self._update_topology_repo() - if ok: - try: - self.mappings.update(mappings.get_mappings(indir=self.mappings_dir, strict=strict)) - except Exception: - if strict: - raise - log.exception("Failed to update mappings") + with topology_update_summary.time(): + ok = self._update_topology_repo() + if ok: + try: + self.mappings.update(mappings.get_mappings(indir=self.mappings_dir, strict=strict)) + except Exception: + if self.strict: + raise + log.exception("Failed to update mappings") + self.mappings.try_again() + else: self.mappings.try_again() - else: - self.mappings.try_again() return self.mappings.data