-
Notifications
You must be signed in to change notification settings - Fork 2
/
Main1.hs
379 lines (345 loc) · 21.9 KB
/
Main1.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE PackageImports #-}
module Main
where
import Control.Concurrent.Async
import Control.Monad
import System.Environment
import Control.Concurrent
import Control.Concurrent.Chan
import Control.Concurrent.STM
import Control.Concurrent.STM.TQueue
import Control.Concurrent.STM.TBQueue
import Control.Concurrent.MVar
import Data.IORef
import Criterion.Main
import Control.Exception(evaluate,mask_)
import qualified "chan-split-fast" Control.Concurrent.Chan.Split as S
import qualified "split-channel" Control.Concurrent.Chan.Split as SC
import Data.Primitive.MutVar
import Control.Monad.Primitive(PrimState)
import Data.Atomics.Counter
import Data.Atomics
import System.Random
import System.Random.MWC
import Data.Array.IArray
import qualified Data.Vector as V
import qualified Data.Vector.Generic.Mutable as MV
import qualified Data.Vector.Unboxed.Mutable as UMV
import qualified Data.Primitive as P
-- TODO fix imports above
import qualified Data.Vector.Mutable as MVec
import System.Time(getClockTime)
import Data.Time.Clock.POSIX
import Data.Word
import Data.Bits
-- Hack since these aren't currently working with ghc 7.8
#if MIN_VERSION_base(4,7,0)
#else
import qualified Data.Concurrent.Queue.MichaelScott as MS
#endif
import qualified Data.Concurrent.Deque.ChaseLev as CL
import Benchmarks
-- These tests initially taken from stm/bench/chanbench.hs, ported to
-- criterion, with some additions.
--
-- The original used CPP to avoid code duplication while also ensuring GHC
-- optimized the code in a realistic fashion. Here we just copy paste.
main = do
let n = 100000
--let n = 2000000 -- original suggested value, bugs if exceeded
mv <- newEmptyMVar -- This to be left empty after each test
mvFull <- newMVar '1'
-- --
-- mvWithFinalizer <- newEmptyMVar
-- mkWeakMVar mvWithFinalizer $ return ()
-- --
-- mvFinalizee <- newMVar 'a'
-- mvWithFinalizer <- newMVar ()
-- mkWeakMVar mvWithFinalizer $
-- modifyMVar_ mvFinalizee (const $ return 'b')
-- --
tmv <- newEmptyTMVarIO
tv <- newTVarIO '1'
ior <- newIORef '1'
mutv <- newMutVar '1'
atomic_counter <- newCounter 0
-- to be left empty at emd of each test:
#if MIN_VERSION_base(4,7,0)
#else
lockfreeQEmpty <- MS.newQ
#endif
chaselevQEmpty <- CL.newQ
chanEmpty <- newChan
tchanEmpty <- newTChanIO
tqueueEmpty <- newTQueueIO
tbqueueEmpty <- newTBQueueIO 2
(fastEmptyI,fastEmptyO) <- S.newSplitChan
(splitchannelEmptyI,splitchannelEmptyO) <- SC.new
-- random generators
mwc_gen <- createSystemRandom
sys_rand_gen <- newStdGen
let arr8 = listArray (0,7) [1..8] :: Array Int Int
let arr16 = listArray (0,15) [1..16] :: Array Int Int
let vec8 = V.fromList [1..8] :: V.Vector Int
let vec16 = V.fromList [1..16] :: V.Vector Int
mvec8 <- V.thaw vec8 -- :: V.MVector (PrimState IO) Int
mvec16 <- V.thaw vec16 -- :: V.MVector (PrimState IO) Int
parr8 <- P.newArray 8 (0::Int)
parr16 <- P.newArray 16 (0::Int)
parr32 <- P.newArray 32 (0::Int)
parr128 <- P.newArray 128 (0::Int)
parr512 <- P.newArray 512 (0::Int)
parr32Justs <- P.newArray 32 (Just 0 :: Maybe Int)
iparr8 <- ((P.newArray 8 (0::Int)) >>= P.unsafeFreezeArray) :: IO (P.Array Int)
iparr16 <- ((P.newArray 16 (0::Int)) >>= P.unsafeFreezeArray) :: IO (P.Array Int)
ba16 <- (P.newByteArray (16 * P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
forM_ [0..15] $ \i-> P.writeByteArray ba16 i (0::Int)
umvvec16 <- UMV.new 16 :: IO (UMV.IOVector Int)
UMV.set umvvec16 0 :: IO ()
defaultMain $
[ bgroup "Channel implementations" $
-- Very artificial; just adding up the consts of the
-- takes/puts/reads involved in getting a single message in and out
[ bgroup "Latency micro-benchmark" $
[ bench "Chan" (writeChan chanEmpty () >> readChan chanEmpty)
, bench "TChan" (atomically (writeTChan tchanEmpty () >> readTChan tchanEmpty))
, bench "TQueue" (atomically (writeTQueue tqueueEmpty () >> readTQueue tqueueEmpty))
, bench "TBQueue" (atomically (writeTBQueue tbqueueEmpty () >> readTBQueue tbqueueEmpty))
, bench "chan-split-fast" (S.writeChan fastEmptyI () >> S.readChan fastEmptyO)
, bench "split-channel" (SC.send splitchannelEmptyI () >> SC.receive splitchannelEmptyO)
#if MIN_VERSION_base(4,7,0)
#else
, bench "lockfree-queue" (MS.pushL lockfreeQEmpty () >> msreadR lockfreeQEmpty)
#endif
, bench "chaselev-dequeue" (CL.pushL chaselevQEmpty () >> clreadR chaselevQEmpty)
]
, bgroup ("Single-thread throughput with "++show n++" messages") $
-- some pure operations we'd like a rough measurement for, e.g.
-- the TQueue performs a reverse [1..n] in a test run, so we'd
-- like an idea of how much that factor affects throughput.
[ bgroup "For scale" $
[ bench "reverse [1..n]" $ nf (\n'-> reverse [1..n']) n
, bench "reverse replicate n 1" $ nf (\n'-> replicate n' (1::Int)) n
]
, bgroup "Chan" $
-- original tests from chanbench.hs
[ bench "sequential write all then read all" $ runtestChan1 n
, bench "repeated write some, read some" $ runtestChan2 n
]
, bgroup "TChan" $
[ bench "sequential write all then read all" $ runtestTChan1 n
, bench "repeated write some, read some" $ runtestTChan2 n
]
, bgroup "TQueue" $
[ bench "sequential write all then read all" $ runtestTQueue1 n
, bench "repeated write some, read some" $ runtestTQueue2 n
]
, bgroup "TBQueue" $
[ bench "sequential write all then read all" $ runtestTBQueue1 n
, bench "repeated write some, read some" $ runtestTBQueue2 n
]
-- OTHER CHAN IMPLEMENTATIONS:
, bgroup "chan-split-fast" $
[ bench "sequential write all then read all" $ runtestSplitChan1 n
, bench "repeated write some, read some" $ runtestSplitChan2 n
]
, bgroup "split-channel" $
[ bench "sequential write all then read all" $ runtestSplitChannel1 n
, bench "repeated write some, read some" $ runtestSplitChannel2 n
]
#if MIN_VERSION_base(4,7,0)
#else
, bgroup "lockfree-queue" $
[ bench "sequential write all then read all" $ runtestLockfreeQueue1 n
, bench "repeated write some, read some" $ runtestLockfreeQueue2 n
]
#endif
, bgroup "chaselev-dequeue" $
[ bench "sequential write all then read all" $ runtestChaseLevQueue1 n
, bench "repeated write some, read some" $ runtestChaseLevQueue2 n
]
]
]
, bgroup "Forking, context switches, and Misc. on a single core" $
[ bench "forkIO" (forkIO (return ()) >> return ())
, bench "put,take MVar" $ do
putMVar mv '0'
takeMVar mv
, bench "put,takeMVar + forkIO + 2 context switches" $ do
forkIO $ putMVar mv '0'
takeMVar mv
, bench "getNumCapabilities" getNumCapabilities
, bench "myThreadId" myThreadId
, bench "myThreadId >>= threadCapability" $ myThreadId >>= threadCapability
-- It may not be possible to re-use generators for our applications of random, so:
, bench "random new_gen" $ newStdGen
, bench "random Int range: (1,8)" $ whnf (randomR (1 :: Int, 8)) sys_rand_gen
-- THIS IS CRAZY SLOW TODO move out of this so we can actually compare graphs:
--, bench "mwc-random new_gen" $ createSystemRandom
, bench "mwc-random Int range: (1,8)" $ ((uniformR (1 :: Int, 8) mwc_gen) :: IO Int)
, bench "randomRIO (1,8)" $ randomRIO (1::Int,8)
, bench "old-time getClockTime" $ getClockTime
, bench "time getPOSIXTime" getPOSIXTime
]
, bgroup "Var primitives" $
[ bgroup "For scale" $
[ bench "mod (Int)" $ nf (2147483647 `mod`) (8 :: Int)
, bench "rem (Int)" $ nf (2147483647 `rem`) (8 :: Int)
, bench "mod (Word)" $ nf ((2147483647::Word) `mod`) (8 :: Word)
, bench "rem (Word)" $ nf ((2147483647::Word) `rem`) (8 :: Word)
-- tricks since we only need powers of two: a `mod` 2^i = a .&. (2^i–1)
, bench "AND modulo (Int)" $ nf (((2147483647::Int) .&.) . subtract 1) (8 :: Int)
, bench "AND modulo (Int) easier" $ nf (((2147483647::Int) .&.)) (7 :: Int)
, bench "AND modulo (Word)" $ nf (((2147483647::Word) .&.) . subtract 1) (8 :: Word)
-- we might end up just using subtraction:
, bench "subtraction (Int)" $ nf (subtract 2147483640) (2147483647 :: Int)
-- ALL OF ABOVE IS STUPID SLOW; probably measuring overhead of function call, boxing/unboxing, etc.
, bench "subtraction on counter incr" $ nfIO $ fmap (subtract 2147483640) (incrCounter 1 atomic_counter)
]
, bgroup "atomic-primops (puts on safety goggles...)" $
[ bench "newCounter" $ newCounter 0
, bench "incrCounter 1" $ incrCounter 1 atomic_counter
, bench "incrCounter 1024" $ incrCounter 1024 atomic_counter
]
, bgroup "IORef" $
[ bench "newIORef Char" $ (newIORef '0')
, bench "writeIORef" $ (writeIORef ior '1')
, bench "readIORef" $ (readIORef ior)
, bench "readIORef (w result forced)" $ nfIO (readIORef ior) -- what does this tell us w/r/t above?
, bench "readForCAS" $ (fmap peekTicket $ readForCAS ior)
, bench "readForCAS (w result forced)" $ nfIO (fmap peekTicket $ readForCAS ior)
, bench "modifyIORef' (i.e. readIORef + writeIORef)" (modifyIORef' ior $ const '2')
, bench "atomicModifyIORef' (i.e. (in GHC) strict atomicModifyMutVar)" $ (atomicModifyIORef' ior $ const ('3','3'))
, bench "atomicModifyIORef (i.e. (in GHC) atomicModifyMutVar)" $ (atomicModifyIORef ior $ const ('3','3'))
, bench "atomicModifyIORefCAS (i.e. atomic-primops CAS loop)" $ (atomicModifyIORefCAS ior $ const ('4','4'))
, bench "atomicModifyIORefCAS' (my CAS loop)" $ (atomicModifyIORefCAS' ior $ const ('4','4'))
]
, bgroup "MVar" $
[ bench "newEmptyMVar" $ newEmptyMVar
, bench "newEmptyMVar + putMVar (i.e. newMVar Char)" (newEmptyMVar >>= \v->putMVar v '0')
, bench "putMVar + takeMVar" $ (putMVar mv '1' >> takeMVar mv)
, bench "modifyMVarMasked_ (i.e. mask + takeMVar + putMVar + exception-handling)" $ (modifyMVarMasked_ mvFull (const $ return '2'))
, bench "mask_ + takeMVar-and-mod + putMVar)" $ mask_ (takeMVar mvFull >>= (putMVar mvFull . const '2'))
, bench "newMVar + mkWeakMVar with finalizer" $ (newMVar '1' >>= flip mkWeakMVar (return ()))
-- These show no effect of a finalizer:
-- , bench "on MVar with finalizer: putMVar, takeMVar" $ (putMVar mvWithFinalizer '1' >> takeMVar mvWithFinalizer)
-- , bench "On target of an MVar finalizer: takeMVar, putMVar" $ (takeMVar mvFinalizee >>= putMVar mvFinalizee)
#if MIN_VERSION_base(4,7,0)
, bench "tryReadMVar" $ tryReadMVar mvFull
, bench "tryReadMVar (w result forced)" $ nfIO $ tryReadMVar mvFull
#endif
, bench "readMVar" $ readMVar mvFull
, bench "readMVar (w result forced)" $ nfIO $ readMVar mvFull
]
, bgroup "TVar" $
[ bench "newTVarIO ()" $ (newTVarIO ())
, bench "atomically writeTVar" $ (atomically $ writeTVar tv '1')
, bench "atomically readTVar" $ (atomically $ readTVar tv)
, bench "readTVarIO" $ (readTVarIO tv)
, bench "atomically modifyTVar' (i.e. atomically (readTVar + writeTVar))" $ (atomically $ modifyTVar' tv (const '2'))
, bench "(atomically writeTVar) + (atomically readTVar)" $ ((atomically $ writeTVar tv '1') >> (atomically $ readTVar tv))
]
, bgroup "TMVar" $
[ bench "newEmptyTMVar (i.e. newTVarIO)" $ newEmptyTMVarIO
, bench "atomically (newEmptyTMVar + putTMVar) (i.e. newTVar + readTVar + writeTVar,retry)" (atomically (newEmptyTMVar >>= \v->putTMVar v ()))
, bench "atomically (putTMVar + takeTMVar)" $ (atomically $ (putTMVar tmv '1' >> takeTMVar tmv))
, bench "(atomically putTMVar) + (atomically takeTMVar)" $ ((atomically $ putTMVar tmv '1') >> (atomically $ takeTMVar tmv))
]
, bgroup "MutVar" $
[ bench "newMutVar ()" $ (newMutVar () :: IO (MutVar (PrimState IO) ()))
, bench "writeMutVar" $ (writeMutVar mutv '1' :: IO ())
, bench "readMutVar" $ (readMutVar mutv :: IO Char)
, bench "atomicModifyMutVar" $ (atomicModifyMutVar mutv $ const ('2','2') :: IO Char)
]
]
-- Some more tests of pure operations relevant to TQueue style dequeue
-- performance.
, bgroup "Misc" $
-- of interest for TQueue style approach
[ bgroup "cons and reverse" $
[ bench "cons" $ nf (:[]) True
, bench "pure unrolled cons then final reverse x10" $ nf testConsUnrolledReverse 10
, bench "pure cons then final reverse x10" $ nf testConsReverse 10
, bench "pure unrolled cons then final reverse x5" $ nf testConsUnrolledReverse 5
, bench "pure cons then final reverse x5" $ nf testConsReverse 5
, bench "pure cons-composition append x10" $ nf testCompositionAppend 10
, bench "pure cons then final reverse x10" $ nf testConsReverse 10
, bench "pure cons-composition append x100" $ nf testCompositionAppend 100
, bench "pure cons then final reverse x100" $ nf testConsReverse 100
, bench "pure cons-composition append x10000" $ nf testCompositionAppend 10000
, bench "pure cons then final reverse x10000" $ nf testConsReverse 10000
, bench "pure cons-composition append and prepend x10" $ nf testCompositionAppendPrepend 10
, bench "pure cons-composition append and prepend x100" $ nf testCompositionAppendPrepend 100
, bench "pure cons-composition append and prepend x10000" $ nf testCompositionAppendPrepend 10000
, bench "mvar-stored cons-composition append x10" $ nfIO $ testCompositionAppendInMVar 10
, bench "mvar-stored cons then final reverse x10" $ nfIO $ testConsReverseInMVar 10
, bench "mvar-stored cons-composition append x100" $ nfIO $ testCompositionAppendInMVar 100
, bench "mvar-stored cons then final reverse x100" $ nfIO $ testConsReverseInMVar 100
, bench "mvar-stored cons-composition append x10000" $ nfIO $ testCompositionAppendInMVar 10000
, bench "mvar-stored cons then final reverse x10000" $ nfIO $ testConsReverseInMVar 10000
, bench "storing mvar-stored cons-composition append x10" $ nfIO $ testStoreCompositionAppendInMVar 10
, bench "storing mvar-stored cons then final reverse x10" $ nfIO $ testStoreConsReverseInMVar 10
, bench "storing mvar-stored cons-composition append x100" $ nfIO $ testStoreCompositionAppendInMVar 100
, bench "storing mvar-stored cons then final reverse x100" $ nfIO $ testStoreConsReverseInMVar 100
, bench "storing mvar-stored cons-composition append x10000" $ nfIO $ testStoreCompositionAppendInMVar 10000
, bench "storing mvar-stored cons then final reverse x10000" $ nfIO $ testStoreConsReverseInMVar 10000
]
, bgroup "arrays" $
[ bgroup "indexing" $
[ bench "index 8th list" $ nf ([(1::Int)..8] !!) 7
, bench "index 8th IArray" $ nf (arr8 !) 7
, bench "index 8th Vector" $ nf (V.unsafeIndex vec8) 7
, bench "index 8th MVector" $ nfIO $ (MV.unsafeRead mvec8) 7
-- I think this is basically MVector AFAICT
, bench "index 8th Primitiv MutableArray" $ nfIO $ (P.readArray parr8) 7
, bench "index 8th Primitiv Array" $ nf (P.indexArray iparr8) 7
, bench "index 16th list" $ nf ([(1::Int)..16] !!) 15
, bench "index 16th IArray" $ nf (arr16 !) 15
, bench "index 16th Vector" $ nf (V.unsafeIndex vec16) 15
, bench "index 16th MVector" $ nfIO $ (MV.unsafeRead mvec16) 15
, bench "index 16th Primitiv MutableArray" $ nfIO $ (P.readArray parr16) 15
, bench "index 16th Primitiv Array" $ nf (P.indexArray iparr16) 15
, bench "readArrayElem for CAS (MutableArray)" $ nfIO (fmap peekTicket $ readArrayElem parr16 15 )
, bench "readArray (MutableArray)" $ nfIO (P.readArray parr16 15 )
, bench "readByteArray (MutableByteArray, usable for CAS)" $ nfIO (P.readByteArray ba16 15 :: IO Int)
, bench "read Mutable Unboxed Vector" $ (UMV.read umvvec16 15 :: IO Int)
]
, bgroup "writing" $
[ bench "write MutableArray" $ (P.writeArray parr16 15 1 :: IO ())
, bench "CAS MutableArray (along with a readArrayElem)" (readArrayElem parr16 15 >>= (\t-> casArrayElem parr16 15 t 2))
, bench "write MutableByteArray" (P.writeByteArray ba16 15 (1::Int) :: IO ())
, bench "CAS MutableByteArray (along with a readByteArray)" (P.readByteArray ba16 15 >>= (\t-> casByteArrayInt ba16 15 t (2::Int)))
, bench "write Mutable Unboxed Vector" $ (UMV.write umvvec16 15 2 :: IO ())
, bench "fetchAddByteArrayInt" $ fetchAddByteArrayInt ba16 14 1
, bench "fetchAddByteArrayInt w result forced" $ nfIO $ fetchAddByteArrayInt ba16 14 1
]
, bgroup "creating" $
[ bench "new MVector 8 Ints" $ (MVec.new 8 :: IO (MVec.IOVector Int))
, bench "new MVector 32 Ints" $ (MVec.new 32 :: IO (MVec.IOVector Int))
, bench "unsafeNew MVector 8 Ints" $ (MVec.unsafeNew 8 :: IO (MVec.IOVector Int))
, bench "unsafeNew MVector 32 Ints" $ (MVec.unsafeNew 32 :: IO (MVec.IOVector Int))
, bench "new MutableArray 8 Ints" $ (P.newArray 8 0 :: IO (P.MutableArray (PrimState IO) Int))
, bench "new MutableArray 32 Ints" $ (P.newArray 32 0 :: IO (P.MutableArray (PrimState IO) Int))
, bench "new MutableArray 32 Nothing :: Maybe Ints" $ (P.newArray 32 Nothing :: IO (P.MutableArray (PrimState IO) (Maybe Int)))
, bench "cloned MutableArray 8 Ints" $ (P.cloneMutableArray parr8 0 8 :: IO (P.MutableArray (PrimState IO) Int))
, bench "cloned MutableArray 32 Ints" $ (P.cloneMutableArray parr32 0 32 :: IO (P.MutableArray (PrimState IO) Int))
, bench "cloned MutableArray 128 Ints" $ (P.cloneMutableArray parr128 0 128 :: IO (P.MutableArray (PrimState IO) Int))
, bench "cloned MutableArray 512 Ints" $ (P.cloneMutableArray parr512 0 512 :: IO (P.MutableArray (PrimState IO) Int))
, bench "new MutableByteArray 8 Ints" (P.newByteArray (8* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
, bench "new MutableByteArray 32 Ints" (P.newByteArray (32* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
, bench "new set MutableByteArray 32 Ints" $ (P.newByteArray (32* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO))) >>= \a-> P.setByteArray a 0 32 (0 :: Int)
, bench "new MutableByteArray 128 Ints" (P.newByteArray (128* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
, bench "new set MutableByteArray 128 Ints" $ (P.newByteArray (128* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO))) >>= \a-> P.setByteArray a 0 128 (0 :: Int)
, bench "new MutableByteArray 512 Ints" (P.newByteArray (512* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
, bench "new set MutableByteArray 512 Ints" $ (P.newByteArray (512* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO))) >>= \a-> P.setByteArray a 0 512 (0 :: Int)
, bench "new MutableByteArray 2048 Ints" (P.newByteArray (2048* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
, bench "new MutableByteArray 8192 Ints" (P.newByteArray (8192* P.sizeOf (0 :: Int)) :: IO (P.MutableByteArray (PrimState IO)))
, bench "new unboxed Mutable Vector 8 Ints" (UMV.new 8 :: IO (UMV.IOVector Int))
, bench "new unboxed Mutable Vector 32 Ints" (UMV.new 32 :: IO (UMV.IOVector Int))
, bench "new unboxed Mutable Vector 128 Ints" (UMV.new 128 :: IO (UMV.IOVector Int))
]
]
]
]
-- takeMVar mvWithFinalizer -- keep finalizer from actually running