From 52057a8252d7a191463101895fda08cbce36fdd1 Mon Sep 17 00:00:00 2001 From: David Cheng Date: Wed, 20 Dec 2023 09:25:05 -0800 Subject: [PATCH] Transfers: add average transfer wait time metric #5012 We keep track of the wait time and transfer time for each completed transfer sent to update_transfer_state. This metric aims to measure the effectiveness of load distribution from different source node selection strategies. Lower average wait time and transfer time generally signifies a more effective strategy. --- lib/rucio/core/request.py | 17 +++++++++++++++++ lib/rucio/core/transfer.py | 3 +++ 2 files changed, 20 insertions(+) diff --git a/lib/rucio/core/request.py b/lib/rucio/core/request.py index 5dcfc7d6ea..c61dc83065 100644 --- a/lib/rucio/core/request.py +++ b/lib/rucio/core/request.py @@ -1371,6 +1371,9 @@ def observe( state: RequestState, file_size: int, *, + submitted_at: Optional[datetime.datetime] = None, + started_at: Optional[datetime.datetime] = None, + transferred_at: Optional[datetime.datetime] = None, session: "Optional[Session]" = None ) -> None: """ @@ -1390,6 +1393,20 @@ def observe( if state == RequestState.DONE: record.files_done += 1 record.bytes_done += file_size + + transfer_time_buckets = ( + 10, 30, 60, 5 * 60, 10 * 60, 20 * 60, 40 * 60, 60 * 60, 1.5 * 60 * 60, 3 * 60 * 60, 6 * 60 * 60, + 12 * 60 * 60, 24 * 60 * 60, 3 * 24 * 60 * 60, 4 * 24 * 60 * 60, 5 * 24 * 60 * 60, + 6 * 24 * 60 * 60, 7 * 24 * 60 * 60, 10 * 24 * 60 * 60, 14 * 24 * 60 * 60, 30 * 24 * 60 * 60, + float('inf') + ) + if submitted_at is not None: + if started_at is not None: + wait_time = (started_at - submitted_at).total_seconds() + METRICS.timer(name='wait_time', buckets=transfer_time_buckets).observe(wait_time) + if transferred_at is not None: + transfer_time = (transferred_at - submitted_at).total_seconds() + METRICS.timer(name='transfer_time', buckets=transfer_time_buckets).observe(transfer_time) else: record.files_failed += 1 if save_samples: diff --git a/lib/rucio/core/transfer.py b/lib/rucio/core/transfer.py index 97c4579236..9a159d67a6 100644 --- a/lib/rucio/core/transfer.py +++ b/lib/rucio/core/transfer.py @@ -566,6 +566,9 @@ def update_transfer_state( activity=request['activity'], state=tt_status_report.state, file_size=request['bytes'], + submitted_at=request.get('submitted_at', None), + started_at=request.get('started_at', None), + transferred_at=request.get('transferred_at', None), session=session, ) request_core.add_monitor_message(