From 864990552c5b2661a2b004eadc60527cc5c4ceb2 Mon Sep 17 00:00:00 2001 From: mu <59917266+4eUeP@users.noreply.github.com> Date: Thu, 9 May 2024 12:44:18 +0800 Subject: [PATCH] fix potential deadlock in the global watcher callback --- src/ZooKeeper.hs | 10 ++++++---- src/ZooKeeper/Internal/FFI.hsc | 30 +++++++++++++++++++++++++++++- test/Spec.hs | 31 +++++++++++++++++++++++++++---- zoovisitor.cabal | 4 ++-- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/ZooKeeper.hs b/src/ZooKeeper.hs index 4de1132..3ca83e8 100644 --- a/src/ZooKeeper.hs +++ b/src/ZooKeeper.hs @@ -76,8 +76,9 @@ zookeeperResInit -- server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" -> Maybe T.WatcherFn -- ^ the global watcher callback function. When notifications are - -- triggered this function will be invoked. FIXME: Calling any zoo operations - -- (e.g. zooGet) will cause an infinite block. + -- triggered this function will be invoked. + -- + -- Note that each callback will be called in a different thread of execution. -> CInt -- ^ session expiration time in milliseconds -> Maybe T.ClientID @@ -700,8 +701,9 @@ zookeeperInit -- server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" -> Maybe T.WatcherFn -- ^ the global watcher callback function. When notifications are - -- triggered this function will be invoked. FIXME: Calling any zoo operations - -- (e.g. zooGet) will cause an infinite block. + -- triggered this function will be invoked. + -- + -- Note that each callback will be called in a different thread of execution. -> CInt -- ^ session expiration time in milliseconds -> Maybe T.ClientID diff --git a/src/ZooKeeper/Internal/FFI.hsc b/src/ZooKeeper/Internal/FFI.hsc index df11d87..277ca6a 100644 --- a/src/ZooKeeper/Internal/FFI.hsc +++ b/src/ZooKeeper/Internal/FFI.hsc @@ -63,7 +63,35 @@ foreign import ccall "wrapper" mkWatcherFnPtr :: WatcherFn -> IO (FunPtr CWatcherFn) mkWatcherFnPtr fn = mkCWatcherFnPtr $ \zh ev st cpath _ctx -> do path <- CBytes.fromCString cpath - fn zh (ZooEvent ev) (ZooState st) path + -- FIXME: better way + -- + -- Here we fork a new thread to run the user's watcher function to avoid + -- blocking the C thread, potential deadlock. + -- + -- Without forkIO, the following code will deadlock, + -- + -- @ + -- gloWatcher :: WatcherFn + -- gloWatcher zh event state path = do + -- print =<< zooGet zh "/node" + -- + -- zookeeperResInit "127.0.0.1:2181" (Just gloWatcher) 5000 Nothing 0 + -- ... + -- @ + -- + -- The reason is: + -- + -- * All zookeeper completions are run by one completion c thread (or + -- likely in one thread). + -- * We are blocking on a MVar and waiting for the callback of zoo_aget + -- return. + -- * gloWatcher will be called by zookeeper library as a c function, which + -- means any blocking in haskell code will block the c thread. + -- + -- So, zooGet is waiting for the result of zoo_aget, and blocking on an MVar, + -- which will block the completion c thread here. The result of zoo_aget is + -- returned by this completion c thread, so it will never be called. + void $ forkIO $ fn zh (ZooEvent ev) (ZooState st) path foreign import ccall safe "zookeeper.h zookeeper_init" zookeeper_init diff --git a/test/Spec.hs b/test/Spec.hs index 8a54a89..e8d72e2 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -21,14 +21,18 @@ import ZooKeeper.Types recvTimeout :: CInt recvTimeout = 5000 +testZkHost :: CB.CBytes +testZkHost = "127.0.0.1:2182" + client :: Resource ZHandle -client = zookeeperResInit "127.0.0.1:2182" Nothing recvTimeout Nothing 0 +client = zookeeperResInit testZkHost Nothing recvTimeout Nothing 0 main :: IO () main = withResource client $ \zh -> do hspec $ opSpec zh hspec $ multiOpSpec zh hspec $ propSpec zh + hspec $ miscSpec zh hspec $ electionSpec1 zh hspec $ electionSpec2 client hspec $ lockSpec1 zh @@ -91,7 +95,7 @@ opSpec zh = do -- https://issues.apache.org/jira/browse/ZOOKEEPER-834 it "create children of ephemeral nodes should throw exception" $ do void $ zooCreate zh "/x" Nothing zooOpenAclUnsafe ZooEphemeral - zooCreate zh "/x/1" Nothing zooOpenAclUnsafe ZooEphemeral `shouldThrow` noChildrenForEphemerals + zooCreate zh "/x/1" Nothing zooOpenAclUnsafe ZooEphemeral `shouldThrow` noChildrenForEphemeralsEx it "get children of a leaf node should return []" $ do void $ zooCreate zh "/y" Nothing zooOpenAclUnsafe ZooPersistent @@ -186,6 +190,22 @@ electionSpec2 client_ = describe "ZooKeeper.zooElection (multi sessions)" $ do takeMVar finished4 readMVar leader `shouldReturn` (2 :: Int) +miscSpec :: ZHandle -> Spec +miscSpec zh = describe "Misc" $ do + -- TODO: add a timeout to this test (should be impl with forkOS instead of forkIO) + it "call zooGet in global watcher should not block" $ do + let nodeName = "/test-misc-node" + void $ zooCreate zh nodeName Nothing zooOpenAclUnsafe ZooEphemeral + void $ zooSet zh nodeName (Just "hello") Nothing + + mvar <- newEmptyMVar + let gloWatcher zh1 event state path = do + r <- zooGet zh1 nodeName + void $ tryPutMVar mvar r + let client1 = zookeeperResInit testZkHost (Just gloWatcher) recvTimeout Nothing 0 + withResource client1 $ \zh1 -> do + (dataCompletionValue <$> takeMVar mvar) `shouldReturn` Just "hello" + lockSpec1 :: ZHandle -> Spec lockSpec1 zh = describe "ZooKeeper.zooLock (single session)" $ do it "Test single client situation" $ do @@ -225,5 +245,8 @@ lockSpec2 client_ = describe "ZooKeeper.zooLock (multi sessions)" $ do ------------------------------------------------------------------------------- -noChildrenForEphemerals :: Selector ZNOCHILDRENFOREPHEMERALS -noChildrenForEphemerals = const True +noChildrenForEphemeralsEx :: Selector ZNOCHILDRENFOREPHEMERALS +noChildrenForEphemeralsEx = const True + +nodeExistsEx :: Selector ZNODEEXISTS +nodeExistsEx = const True diff --git a/zoovisitor.cabal b/zoovisitor.cabal index 3387955..6339a27 100644 --- a/zoovisitor.cabal +++ b/zoovisitor.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: zoovisitor -version: 0.2.6.1 +version: 0.2.7.0 synopsis: A haskell binding to Apache Zookeeper C library(mt) using Haskell Z project. @@ -12,7 +12,7 @@ license-file: LICENSE copyright: Copyright (c) author: mu maintainer: mu@laxcat.xyz -tested-with: GHC ==8.10.7 || ==9.0.2 || ==9.2.8 || ==9.4.5 +tested-with: GHC ==8.10.7 || ==9.0.2 || ==9.2.8 || ==9.4.8 category: Database homepage: https://github.com/ZHaskell/zoovisitor bug-reports: https://github.com/ZHaskell/zoovisitor/issues