From c24ff140b5294d518c69a09f6b0882b8d026f8ad Mon Sep 17 00:00:00 2001 From: DiCanio Date: Mon, 21 Oct 2019 11:22:06 +0200 Subject: [PATCH 1/3] Reset Gauge Metrics Resolves #5 --- src/datomic_tx_metrics/core.clj | 88 ++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/src/datomic_tx_metrics/core.clj b/src/datomic_tx_metrics/core.clj index c38d57a..44fe08f 100644 --- a/src/datomic_tx_metrics/core.clj +++ b/src/datomic_tx_metrics/core.clj @@ -195,14 +195,17 @@ (defn tx-metrics-callback-handler "Called by Datomic transactor transferring its metrics." [tx-metrics] - (when-let [{:keys [sum]} (:AlarmIndexingJobFailed tx-metrics)] - (prom/set! alarms "index-job-failed" sum)) + (if-let [{:keys [sum]} (:AlarmIndexingJobFailed tx-metrics)] + (prom/set! alarms "index-job-failed" sum) + (prom/set! alarms "index-job-failed" 0)) (when-let [{:keys [sum]} (:AlarmBackPressure tx-metrics)] - (prom/set! alarms "back-pressure" sum)) + (prom/set! alarms "back-pressure" sum) + (prom/set! alarms "back-pressure" 0)) (when-let [{:keys [sum]} (:AlarmUnhandledException tx-metrics)] - (prom/set! alarms "unhandled-exception" sum)) + (prom/set! alarms "unhandled-exception" sum) + (prom/set! alarms "unhandled-exception" 0)) (->> (keys tx-metrics) (filter @@ -234,8 +237,9 @@ (prom/inc! transacted-datoms-total sum) (prom/inc! transactions-total count)) - (when-let [{:keys [count]} (:TransactionBatch tx-metrics)] - (prom/set! transactions-batch count)) + (if-let [{:keys [count]} (:TransactionBatch tx-metrics)] + (prom/set! transactions-batch count) + (prom/clear! transactions-batch)) (when-let [{:keys [sum]} (:TransactionBytes tx-metrics)] (prom/inc! transacted-bytes-total sum)) @@ -249,57 +253,73 @@ (when-let [{:keys [sum]} (:LogWriteMsec tx-metrics)] (prom/inc! transactions-write-log-msec-total sum)) - (when-let [{:keys [sum]} (:Datoms tx-metrics)] - (prom/clear! datoms) - (prom/inc! datoms sum)) + (if-let [{:keys [sum]} (:Datoms tx-metrics)] + (prom/set! datoms sum) + (prom/clear! datoms)) - (when-let [{:keys [sum]} (:IndexDatoms tx-metrics)] - (prom/set! index-datoms sum)) + ; TODO: check if resetting this is actually what resembles the transactor state + (if-let [{:keys [sum]} (:IndexDatoms tx-metrics)] + (prom/set! index-datoms sum) + (prom/clear! index-datoms)) - (when-let [{:keys [sum]} (:IndexSegments tx-metrics)] - (prom/set! index-segments sum)) + ; TODO: check if resetting this is actually what resembles the transactor state + (if-let [{:keys [sum]} (:IndexSegments tx-metrics)] + (prom/set! index-segments sum) + (prom/clear! index-segments)) - (when-let [{:keys [sum]} (:IndexWrites tx-metrics)] - (prom/set! index-writes sum)) + (if-let [{:keys [sum]} (:IndexWrites tx-metrics)] + (prom/set! index-writes sum) + (prom/clear! index-writes)) - (when-let [{:keys [sum]} (:IndexWriteMsec tx-metrics)] - (prom/set! index-writes-msec sum)) + (if-let [{:keys [sum]} (:IndexWriteMsec tx-metrics)] + (prom/set! index-writes-msec sum) + (prom/clear! index-writes-msec)) - (when-let [{:keys [sum]} (:CreateEntireIndexMsec tx-metrics)] - (prom/set! index-creation-msec sum)) + (if-let [{:keys [sum]} (:CreateEntireIndexMsec tx-metrics)] + (prom/set! index-creation-msec sum) + (prom/clear! index-creation-msec)) - (when-let [{:keys [sum]} (:CreateFulltextIndexMsec tx-metrics)] - (prom/set! index-fulltext-creation-msec sum)) + (if-let [{:keys [sum]} (:CreateFulltextIndexMsec tx-metrics)] + (prom/set! index-fulltext-creation-msec sum) + (prom/clear! index-fulltext-creation-msec)) (when-let [{:keys [sum]} (:MemoryIndexMB tx-metrics)] (prom/set! memory-index-consumed-megabytes sum)) - (when-let [{:keys [sum]} (:MemoryIndexFillMsec tx-metrics)] - (prom/set! memory-index-fill-msec sum)) + (if-let [{:keys [sum]} (:MemoryIndexFillMsec tx-metrics)] + (prom/set! memory-index-fill-msec sum) + (prom/clear! memory-index-fill-msec)) (when-let [{:keys [sum count]} (:StoragePutBytes tx-metrics)] (prom/inc! storage-write-operations-total count) (prom/inc! storage-write-bytes-total sum)) - (when-let [{:keys [sum]} (:StoragePutMsec tx-metrics)] - (prom/set! storage-write-msec sum)) + (if-let [{:keys [sum]} (:StoragePutMsec tx-metrics)] + (prom/set! storage-write-msec sum) + (prom/clear! storage-write-msec)) (when-let [{:keys [sum count]} (:StorageGetBytes tx-metrics)] (prom/inc! storage-read-operations-total count) (prom/inc! storage-read-bytes-total sum)) - (when-let [{:keys [sum]} (:StorageGetMsec tx-metrics)] - (prom/set! storage-read-msec sum)) + (if-let [{:keys [sum]} (:StorageGetMsec tx-metrics)] + (prom/set! storage-read-msec sum) + (prom/clear! storage-read-msec)) - (when-let [{:keys [sum count]} (:StorageBackoff tx-metrics)] - (prom/set! storage-backoff-msec sum) - (prom/inc! storage-backoff-retries-total count)) + (if-let [{:keys [sum count]} (:StorageBackoff tx-metrics)] + (do + (prom/set! storage-backoff-msec sum) + (prom/inc! storage-backoff-retries-total count)) + (prom/clear! storage-backoff-msec)) - (when-let [{:keys [sum count]} (:ObjectCache tx-metrics)] - (prom/set! object-cache-hits-ratio (/ (double sum) count))) + ; TODO: discuss if this is actually a sane move - resetting this might be misleading without another metrics showing how many cache requests were made... + (if-let [{:keys [sum count]} (:ObjectCache tx-metrics)] + (prom/set! object-cache-hits-ratio (/ (double sum) count)) + (prom/clear! object-cache-hits-ratio)) - (when-let [{:keys [sum]} (:GarbageSegments tx-metrics)] - (prom/set! garbage-segments sum)) + (if-let [{:keys [sum]} (:GarbageSegments tx-metrics)] + (prom/set! garbage-segments sum) + (prom/clear! garbage-segments)) (when-let [{:keys [sum count]} (:HeartbeatMsec tx-metrics)] (prom/set! heartbeats-msec sum) From cf19325a00d6401c5f9f7f576a8f7131910b70b6 Mon Sep 17 00:00:00 2001 From: DiCanio Date: Mon, 21 Oct 2019 16:07:54 +0200 Subject: [PATCH 2/3] Introduce Object Cache Requests Metric ...in order to support the object-cache-hit-ratio metric if it is cleared. Otherwise one would not know if there are only cache misses or just no requests at all. --- src/datomic_tx_metrics/core.clj | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/datomic_tx_metrics/core.clj b/src/datomic_tx_metrics/core.clj index 44fe08f..edfbda9 100644 --- a/src/datomic_tx_metrics/core.clj +++ b/src/datomic_tx_metrics/core.clj @@ -27,6 +27,10 @@ "Number of segments in the Datomic object cache." {:namespace "datomic"}) +(prom/defgauge object-cache-requests + "Number of requests to the Datomic object cache." + {:namespace "datomic"}) + (prom/defgauge remote-peers "Number of remote peers connected." {:namespace "datomic"}) @@ -158,6 +162,7 @@ (.register alarms) (.register available-ram-megabytes) (.register object-cache-size) + (.register object-cache-requests) (.register remote-peers) (.register successful-metric-reports) (.register transacted-datoms-total) @@ -312,10 +317,13 @@ (prom/inc! storage-backoff-retries-total count)) (prom/clear! storage-backoff-msec)) - ; TODO: discuss if this is actually a sane move - resetting this might be misleading without another metrics showing how many cache requests were made... (if-let [{:keys [sum count]} (:ObjectCache tx-metrics)] - (prom/set! object-cache-hits-ratio (/ (double sum) count)) - (prom/clear! object-cache-hits-ratio)) + (do + (prom/set! object-cache-hits-ratio (/ (double sum) count)) + (prom/set! object-cache-requests sum)) + (do + (prom/clear! object-cache-hits-ratio) + (prom/clear! object-cache-requests))) (if-let [{:keys [sum]} (:GarbageSegments tx-metrics)] (prom/set! garbage-segments sum) From 8f5eb3270212a46b6c237181a89b44fc2ebda11d Mon Sep 17 00:00:00 2001 From: DiCanio Date: Mon, 21 Oct 2019 16:26:19 +0200 Subject: [PATCH 3/3] Update Project Version --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index b0f627f..453694e 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject datomic-tx-metrics "0.1.0" +(defproject datomic-tx-metrics "0.2.0" :description "Containing a callback handler for collecting Datomic Transactor + JVM metrics for consumption (e.g. by Prometheus) using a web endpoint offered by the included web server." :dependencies [[aleph "0.4.6"]