Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Prometheus metrics take 2 #3708

Merged
merged 4 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements-rootless.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ itsdangerous~=2.1.2
Jinja2~=3.1.2
ldap3~=2.9.1
MarkupSafe~=2.1.2
prometheus-client~=0.20.0
pyasn1~=0.5.0
python-dateutil~=2.8.2
python-gnupg~=0.5.0
Expand Down
12 changes: 12 additions & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,18 @@ def _get_authorized():
return default_authorized


try:
from werkzeug.middleware.dispatcher import DispatcherMiddleware
from prometheus_client import make_wsgi_app
# Enable prometheus integration with the topology webapp
app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {
'/metrics': make_wsgi_app()
})
except ImportError:
print("*** /metrics endpoint unavailable: prometheus-client missing",
file=sys.stderr)


if __name__ == '__main__':
if "--auth" in sys.argv[1:]:
default_authorized = True
Expand Down
165 changes: 97 additions & 68 deletions src/webapp/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
import contextlib
import datetime
import logging
import os
import time
from typing import Dict, Set, List, Optional

import yaml
try:
from prometheus_client import Summary
except ImportError:
class Summary:
"""A dummy prometheus_client.Summary class"""

def __init__(self, name: str, documentation: str):
_ = name
_ = documentation

@contextlib.contextmanager
def time(self):
pass
yield
pass


from webapp import common, contacts_reader, ldap_data, mappings, project_reader, rg_reader, vo_reader
from webapp.common import readfile
Expand All @@ -15,6 +32,11 @@

log = logging.getLogger(__name__)

topology_update_summary = Summary('topology_update_seconds', 'Time spent updating the topology repo data')
contact_update_summary = Summary('contact_update_seconds', 'Time spent updating the contact repo data')
comanage_update_summary = Summary('comanage_update_seconds', 'Time spent updating the comanage LDAP data')
ligo_update_summary = Summary('ligo_update_seconds', 'Time spent updating the LIGO LDAP data')


class CachedData:
def __init__(self, data=None, timestamp=0, force_update=True, cache_lifetime=60*15,
Expand Down Expand Up @@ -153,17 +175,18 @@ def get_contact_db_data(self) -> Optional[ContactsData]:
data = contacts_reader.get_contacts_data(None)
self.contacts_data.update(data)
elif self.contacts_data.should_update():
ok = self._update_contacts_repo()
if ok:
try:
self.contacts_data.update(contacts_reader.get_contacts_data(self.contacts_file))
except Exception:
if self.strict:
raise
log.exception("Failed to update contacts data")
with contact_update_summary.time():
ok = self._update_contacts_repo()
if ok:
try:
self.contacts_data.update(contacts_reader.get_contacts_data(self.contacts_file))
except Exception:
if self.strict:
raise
log.exception("Failed to update contacts data")
self.contacts_data.try_again()
else:
self.contacts_data.try_again()
else:
self.contacts_data.try_again()

return self.contacts_data.data

Expand All @@ -179,15 +202,16 @@ def get_comanage_data(self) -> Optional[ContactsData]:
data = contacts_reader.get_contacts_data(None)
self.comanage_data.update(data)
elif self.comanage_data.should_update():
try:
idmap = self.get_cilogon_ldap_id_map()
data = ldap_data.cilogon_id_map_to_yaml_data(idmap)
self.comanage_data.update(ContactsData(data))
except Exception:
if self.strict:
raise
log.exception("Failed to update comanage data")
self.comanage_data.try_again()
with comanage_update_summary.time():
try:
idmap = self.get_cilogon_ldap_id_map()
data = ldap_data.cilogon_id_map_to_yaml_data(idmap)
self.comanage_data.update(ContactsData(data))
except Exception:
if self.strict:
raise
log.exception("Failed to update comanage data")
self.comanage_data.try_again()

return self.comanage_data.data

Expand Down Expand Up @@ -227,15 +251,16 @@ def get_ligo_dn_list(self) -> Optional[List[str]]:
"getting empty list")
return []
elif self.ligo_dn_list.should_update():
try:
ligo_ldap_pass = readfile(self.ligo_ldap_passfile, log)
new_dn_list = ldap_data.get_ligo_ldap_dn_list(self.ligo_ldap_url, self.ligo_ldap_user, ligo_ldap_pass)
self.ligo_dn_list.update(new_dn_list)
except Exception:
if self.strict:
raise
log.exception("Failed to update LIGO data")
self.ligo_dn_list.try_again()
with ligo_update_summary.time():
try:
ligo_ldap_pass = readfile(self.ligo_ldap_passfile, log)
new_dn_list = ldap_data.get_ligo_ldap_dn_list(self.ligo_ldap_url, self.ligo_ldap_user, ligo_ldap_pass)
self.ligo_dn_list.update(new_dn_list)
except Exception:
if self.strict:
raise
log.exception("Failed to update LIGO data")
self.ligo_dn_list.try_again()

return self.ligo_dn_list.data

Expand All @@ -261,17 +286,18 @@ def get_topology(self) -> Optional[Topology]:
May return None if we fail to get the data for the first time.
"""
if self.topology.should_update():
ok = self._update_topology_repo()
if ok:
try:
self.topology.update(rg_reader.get_topology(self.topology_dir, self.get_contacts_data(), strict=self.strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update topology")
with topology_update_summary.time():
ok = self._update_topology_repo()
if ok:
try:
self.topology.update(rg_reader.get_topology(self.topology_dir, self.get_contacts_data(), strict=self.strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update topology")
self.topology.try_again()
else:
self.topology.try_again()
else:
self.topology.try_again()

return self.topology.data

Expand All @@ -281,17 +307,18 @@ def get_vos_data(self) -> Optional[VOsData]:
May return None if we fail to get the data for the first time.
"""
if self.vos_data.should_update():
ok = self._update_topology_repo()
if ok:
try:
self.vos_data.update(vo_reader.get_vos_data(self.vos_dir, self.get_contacts_data(), strict=self.strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update VOs")
with topology_update_summary.time():
ok = self._update_topology_repo()
if ok:
try:
self.vos_data.update(vo_reader.get_vos_data(self.vos_dir, self.get_contacts_data(), strict=self.strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update VOs")
self.vos_data.try_again()
else:
self.vos_data.try_again()
else:
self.vos_data.try_again()

return self.vos_data.data

Expand All @@ -301,17 +328,18 @@ def get_projects(self) -> Optional[Dict]:
May return None if we fail to get the data for the first time.
"""
if self.projects.should_update():
ok = self._update_topology_repo()
if ok:
try:
self.projects.update(project_reader.get_projects(self.projects_dir, strict=self.strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update projects")
with topology_update_summary.time():
ok = self._update_topology_repo()
if ok:
try:
self.projects.update(project_reader.get_projects(self.projects_dir, strict=self.strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update projects")
self.projects.try_again()
else:
self.projects.try_again()
else:
self.projects.try_again()

return self.projects.data

Expand All @@ -323,17 +351,18 @@ def get_mappings(self, strict=None) -> Optional[mappings.Mappings]:
if strict is None:
strict = self.strict
if self.mappings.should_update():
ok = self._update_topology_repo()
if ok:
try:
self.mappings.update(mappings.get_mappings(indir=self.mappings_dir, strict=strict))
except Exception:
if strict:
raise
log.exception("Failed to update mappings")
with topology_update_summary.time():
ok = self._update_topology_repo()
if ok:
try:
self.mappings.update(mappings.get_mappings(indir=self.mappings_dir, strict=strict))
except Exception:
if self.strict:
raise
log.exception("Failed to update mappings")
self.mappings.try_again()
else:
self.mappings.try_again()
else:
self.mappings.try_again()

return self.mappings.data

Expand Down
Loading