diff --git a/cachito/web/api_v1.py b/cachito/web/api_v1.py index 1afeeebc4..91767bda9 100644 --- a/cachito/web/api_v1.py +++ b/cachito/web/api_v1.py @@ -25,7 +25,7 @@ from cachito.errors import MessageBrokerError, NoWorkers, RequestErrorOrigin, ValidationError from cachito.web import db from cachito.web.content_manifest import BASE_ICM -from cachito.web.metrics import cachito_metrics +from cachito.web.metrics import cachito_metrics, requests_dec, requests_inc from cachito.web.models import ( ConfigFileBase64, EnvironmentVariable, @@ -337,8 +337,8 @@ def create_request(): db.session.add(request) db.session.commit() - cachito_metrics["gauge_state"].labels(state="total").inc() - cachito_metrics["gauge_state"].labels(state=request.state.state_name).inc() + requests_inc("total") + requests_inc(request.state.state_name) if current_user.is_authenticated: flask.current_app.logger.info( @@ -438,9 +438,9 @@ def create_request(): "Failed to schedule the task for request %d. Failing the request.", request.id ) error = "Failed to schedule the task to the workers. Please try again." - cachito_metrics["gauge_state"].labels(state=request.state.state_name).dec() + requests_dec(request.state.state_name) request.add_state("failed", error) - cachito_metrics["gauge_state"].labels(state=request.state.state_name).inc() + requests_inc(request.state.state_name) db.session.commit() raise MessageBrokerError(error) @@ -525,8 +525,8 @@ def patch_request(request_id): delete_logs = False if "state" in payload and "state_reason" in payload: - cachito_metrics["gauge_state"].labels(state=payload["state"]).inc() - cachito_metrics["gauge_state"].labels(state=request.state.state_name).dec() + requests_inc(payload["state"]) + requests_dec(request.state.state_name) new_state = payload["state"] delete_bundle = new_state == "stale" and request.state.state_name != "failed" if new_state in ("stale", "failed"): diff --git a/cachito/web/metrics.py b/cachito/web/metrics.py index ca619c714..7078464a6 100644 --- a/cachito/web/metrics.py +++ b/cachito/web/metrics.py @@ -6,6 +6,7 @@ from prometheus_flask_exporter.multiprocess import GunicornInternalPrometheusMetrics cachito_metrics = {} +hostname = socket.gethostname() def init_metrics(app): @@ -17,7 +18,6 @@ def init_metrics(app): """ registry = CollectorRegistry() multiproc_temp_dir = app.config["PROMETHEUS_METRICS_TEMP_DIR"] - hostname = socket.gethostname() if not os.path.isdir(multiproc_temp_dir): os.makedirs(multiproc_temp_dir) @@ -27,10 +27,23 @@ def init_metrics(app): ) metrics.init_app(app) gauge_state = Gauge( - "cachito_requests_count", "Requests in each state", ["state"], multiprocess_mode="livesum" + "cachito_requests_count", + "Requests in each state", + ["state", "host"], + multiprocess_mode="livesum", ) request_duration = Summary( "cachito_request_duration_seconds", "Time spent in in_progress state" ) cachito_metrics["gauge_state"] = gauge_state cachito_metrics["request_duration"] = request_duration + + +def requests_inc(state): + """Increase the number of requests in given state.""" + cachito_metrics["gauge_state"].labels(state=state, host=hostname).inc() + + +def requests_dec(state): + """Decrease the number of requests in the given state.""" + cachito_metrics["gauge_state"].labels(state=state, host=hostname).dec()