From 812a07b3b9f2d212f80499433b638fb5b4a78f70 Mon Sep 17 00:00:00 2001 From: Ivar Refsdal Date: Thu, 18 Aug 2022 13:00:02 +0200 Subject: [PATCH] Release 0.2.60 Warn about not setting connection/socket-timeout when using clj-http https://github.com/ivarref/yoltq/issues/2 Add :healthy-allowed-error-time configuration option, default is 15 minutes --- README.md | 28 +++++++++++++-- pom.xml | 4 +-- src/com/github/ivarref/yoltq.clj | 14 +++++--- src/com/github/ivarref/yoltq/error_poller.clj | 36 +++++++++++-------- .../ivarref/yoltq/error_poller_test.clj | 8 ++--- .../com/github/ivarref/yoltq/virtual_test.clj | 15 +++++++- 6 files changed, 77 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index ade8650..05e7033 100644 --- a/README.md +++ b/README.md @@ -62,18 +62,25 @@ Imagine the following code: ```clojure (defn post-handler [user-input] (let [db-item (process user-input) - ext-ref (clj-http.client/post ext-service {...})] ; may throw exception + ext-ref (clj-http.client/post ext-service {:connection-timeout 3000 ; timeout in milliseconds + :socket-timeout 10000 ; timeout in milliseconds + ...})] ; may throw exception @(d/transact conn [(assoc db-item :some/ext-ref ext-ref)]))) ``` What if the POST request fails? Should it be retried? For how long? Should it be allowed to fail? How do you then process failures later? +PS: If you do not set connection/socket-timeout, there is a chance that +clj-http/client will wait for all eternity in the case of a dropped TCP connection. + The queue way to solve this would be: ```clojure (defn get-ext-ref [{:keys [id]}] - (let [ext-ref (clj-http.client/post ext-service {...})] ; may throw exception + (let [ext-ref (clj-http.client/post ext-service {:connection-timeout 3000 ; timeout in milliseconds + :socket-timeout 10000 ; timeout in milliseconds + ...})] ; may throw exception @(d/transact conn [[:db/cas [:some/id id] :some/ext-ref nil @@ -82,7 +89,7 @@ The queue way to solve this would be: (yq/add-consumer! :get-ext-ref get-ext-ref {:allow-cas-failure? true}) (defn post-handler [user-input] - (let [{:some/keys [id] :as db-item} (process user-input) + (let [{:some/keys [id] :as db-item} (process user-input)] @(d/transact conn [db-item (yq/put :get-ext-ref {:id id})]))) ``` @@ -371,6 +378,21 @@ Note: I have not tried these libraries myself. ## Change log +#### 2022-08-18 v0.2.60 [diff](https://github.com/ivarref/yoltq/compare/v0.2.59...v0.2.60) +Improved: Added config option `:healthy-allowed-error-time`: +``` + ; If you are dealing with a flaky downstream service, you may not want + ; yoltq to mark itself as unhealthy on the first failure encounter with + ; the downstream service. Change this setting to let yoltq mark itself + ; as healthy even though a queue item has been failing for some time. + :healthy-allowed-error-time (Duration/ofMinutes 15) +``` + +#### 2022-08-15 v0.2.59 [diff](https://github.com/ivarref/yoltq/compare/v0.2.58...v0.2.59) +Fixed: +* Race condition that made the following possible: `stop!` would terminate the slow thread +watcher, and a stuck thread could keep `stop!` from completing! + #### 2022-06-30 v0.2.58 [diff](https://github.com/ivarref/yoltq/compare/v0.2.57...v0.2.58) Slightly more safe EDN printing and parsing. Recommended reading: diff --git a/pom.xml b/pom.xml index 187b8ad..719b0e7 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ jar com.github.ivarref yoltq - 0.2.59 + 0.2.60 yoltq @@ -30,7 +30,7 @@ scm:git:git://github.com/ivarref/yoltq.git scm:git:ssh://git@github.com/ivarref/yoltq.git - v0.2.59 + v0.2.60 https://github.com/ivarref/yoltq \ No newline at end of file diff --git a/src/com/github/ivarref/yoltq.clj b/src/com/github/ivarref/yoltq.clj index 32693c3..89112a6 100644 --- a/src/com/github/ivarref/yoltq.clj +++ b/src/com/github/ivarref/yoltq.clj @@ -44,6 +44,12 @@ ; otherwise occur if competing with the tx-report-queue listener. :init-backoff-time (Duration/ofSeconds 60) + ; If you are dealing with a flaky downstream service, you may not want + ; yoltq to mark itself as unhealthy on the first failure encounter with + ; the downstream service. Change this setting to let yoltq mark itself + ; as healthy even though a queue item has been failing for some time. + :healthy-allowed-error-time (Duration/ofMinutes 15) + ; How frequent polling for init, error and hung jobs should be done. :poll-delay (Duration/ofSeconds 10) @@ -259,10 +265,10 @@ (let [conn (d/connect uri) started-consuming? (promise) n 1] - (init! {:conn conn - :error-backoff-time (Duration/ofSeconds 1) - :poll-delay (Duration/ofSeconds 1) - :max-execute-time (Duration/ofSeconds 3) + (init! {:conn conn + :error-backoff-time (Duration/ofSeconds 1) + :poll-delay (Duration/ofSeconds 1) + :max-execute-time (Duration/ofSeconds 3) :slow-thread-show-stacktrace? false}) (add-consumer! :q (fn [_] (deliver started-consuming? true) diff --git a/src/com/github/ivarref/yoltq/error_poller.clj b/src/com/github/ivarref/yoltq/error_poller.clj index ee6359e..dffff28 100644 --- a/src/com/github/ivarref/yoltq/error_poller.clj +++ b/src/com/github/ivarref/yoltq/error_poller.clj @@ -1,8 +1,8 @@ (ns com.github.ivarref.yoltq.error-poller - (:require [datomic.api :as d] - [com.github.ivarref.yoltq.utils :as u] + (:require [clojure.tools.logging :as log] [com.github.ivarref.yoltq.ext-sys :as ext] - [clojure.tools.logging :as log])) + [com.github.ivarref.yoltq.utils :as u] + [datomic.api :as d])) (defn get-state [v] @@ -64,31 +64,39 @@ {:run-callback :recovery})))))) -(defn do-poll-errors [{:keys [conn system-error +(defn do-poll-errors [{:keys [conn + system-error on-system-error on-system-recovery - healthy?] + healthy? + healthy-allowed-error-time] :or {on-system-error (fn [] (log/error "There are yoltq queues which have errors") nil) on-system-recovery (fn [] (log/info "Yoltq recovered"))} - :as config}] + :as config} + now-ms] (assert (some? conn) "expected :conn to be present") (assert (some? system-error) "expected :system-error to be present") - (let [error-count (or (d/q '[:find (count ?e) . - :in $ ?status + (assert (nat-int? healthy-allowed-error-time) "expected :healthy-allowed-error-time to be present") + (let [max-init-time (- now-ms healthy-allowed-error-time) + error-count (or (d/q '[:find (count ?e) . + :in $ ?status ?max-init-time :where - [?e :com.github.ivarref.yoltq/status ?status]] + [?e :com.github.ivarref.yoltq/status ?status] + [?e :com.github.ivarref.yoltq/init-time ?init-time] + [(<= ?init-time ?max-init-time)]] (d/db conn) - u/status-error) + u/status-error + max-init-time) 0)] (if (pos-int? error-count) (do (log/debug "poll-errors found" error-count "errors in system") (reset! healthy? false)) (reset! healthy? true)) - (let [{:keys [run-callback] :as new-state} (swap! system-error handle-error-count config (ext/now-ms) error-count)] + (let [{:keys [run-callback] :as new-state} (swap! system-error handle-error-count config now-ms error-count)] (when run-callback (cond (= run-callback :error) (on-system-error) @@ -99,18 +107,18 @@ :else (log/error "unhandled callback-type" run-callback)) (log/debug "run-callback is" run-callback)) - new-state))) + error-count))) (defn poll-errors [running? config-atom] (try (when @running? - (do-poll-errors @config-atom)) + (do-poll-errors @config-atom (ext/now-ms))) (catch Throwable t (log/error t "unexpected error in poll-errors:" (ex-message t)) nil))) (comment - (do-poll-errors @com.github.ivarref.yoltq/*config*)) + (do-poll-errors @com.github.ivarref.yoltq/*config* (ext/now-ms))) diff --git a/test/com/github/ivarref/yoltq/error_poller_test.clj b/test/com/github/ivarref/yoltq/error_poller_test.clj index 18f0aa7..4d92b81 100644 --- a/test/com/github/ivarref/yoltq/error_poller_test.clj +++ b/test/com/github/ivarref/yoltq/error_poller_test.clj @@ -1,9 +1,9 @@ (ns com.github.ivarref.yoltq.error-poller-test - (:require [clojure.test :refer [deftest is]] - [com.github.ivarref.yoltq.error-poller :as ep] + (:require [clojure.edn :as edn] + [clojure.test :refer [deftest is]] [clojure.tools.logging :as log] - [com.github.ivarref.yoltq.log-init :as logconfig] - [clojure.edn :as edn])) + [com.github.ivarref.yoltq.error-poller :as ep] + [com.github.ivarref.yoltq.log-init :as logconfig])) (deftest error-poller diff --git a/test/com/github/ivarref/yoltq/virtual_test.clj b/test/com/github/ivarref/yoltq/virtual_test.clj index e077517..996792e 100644 --- a/test/com/github/ivarref/yoltq/virtual_test.clj +++ b/test/com/github/ivarref/yoltq/virtual_test.clj @@ -2,6 +2,8 @@ (:require [clojure.test :refer [deftest is use-fixtures] :refer-macros [thrown?]] [clojure.tools.logging :as log] [com.github.ivarref.yoltq :as yq] + [com.github.ivarref.yoltq.error-poller :as error-poller] + [com.github.ivarref.yoltq.ext-sys :as ext] [com.github.ivarref.yoltq.impl :as i] [com.github.ivarref.yoltq.migrate :as migrate] [com.github.ivarref.yoltq.test-queue :as tq] @@ -9,7 +11,8 @@ [com.github.ivarref.yoltq.utils :as uu] [datomic-schema.core] [datomic.api :as d] - [taoensso.timbre :as timbre])) + [taoensso.timbre :as timbre]) + (:import (java.time Duration))) (use-fixtures :each tq/call-with-virtual-queue!) @@ -367,3 +370,13 @@ (is (= #{{:id "a"}} @received)) #_(timbre/with-level :fatal (is (thrown? Exception @(d/transact conn [(yq/put :q {})])))))) + +(deftest healthy-allowed-error-time-test + (let [conn (u/empty-conn)] + (yq/init! {:conn conn}) + (yq/add-consumer! :q (fn [_] (throw (ex-info "" {})))) + @(d/transact conn [(yq/put :q {:work 123})]) + (tq/consume-expect! :q :error) + (is (= 0 (error-poller/do-poll-errors @yq/*config* (ext/now-ms)))) + (is (= 0 (error-poller/do-poll-errors @yq/*config* (+ (dec (.toMillis (Duration/ofMinutes 15))) (ext/now-ms))))) + (is (= 1 (error-poller/do-poll-errors @yq/*config* (+ (.toMillis (Duration/ofMinutes 15)) (ext/now-ms)))))))