From 653d5cbebe3e1cd0952597ce6b13021bdee7decf Mon Sep 17 00:00:00 2001 From: Soumik Sarkar Date: Mon, 18 Dec 2023 21:54:36 +0530 Subject: [PATCH 1/9] Use SmallMutableArray in Internal.Buffer (#15) ...instead of Internal.Array. Since a Buffer doesn't need to support slicing, this will reduce the memory footprint of a Buffer (in case GHC can't optimize the indirection and offset and length away). --- src/Data/RRBVector/Internal/Array.hs | 5 ++++- src/Data/RRBVector/Internal/Buffer.hs | 14 +++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Data/RRBVector/Internal/Array.hs b/src/Data/RRBVector/Internal/Array.hs index 886766d..5110014 100644 --- a/src/Data/RRBVector/Internal/Array.hs +++ b/src/Data/RRBVector/Internal/Array.hs @@ -14,7 +14,7 @@ module Data.RRBVector.Internal.Array ( Array, MutableArray , ifoldrStep, ifoldlStep, ifoldrStep', ifoldlStep' - , empty, singleton, from2 + , empty, singleton, from2, wrap , replicate, replicateSnoc , index, head, last , update, adjust, adjust' @@ -122,6 +122,9 @@ from2 x y = Array 0 2 $ runSmallArray $ do writeSmallArray sma 1 y pure sma +wrap :: SmallArray a -> Array a +wrap arr = Array 0 (sizeofSmallArray arr) arr + replicate :: Int -> a -> Array a replicate n x = Array 0 n $ runSmallArray (newSmallArray n x) diff --git a/src/Data/RRBVector/Internal/Buffer.hs b/src/Data/RRBVector/Internal/Buffer.hs index 7bd671d..71b5bad 100644 --- a/src/Data/RRBVector/Internal/Buffer.hs +++ b/src/Data/RRBVector/Internal/Buffer.hs @@ -13,25 +13,29 @@ module Data.RRBVector.Internal.Buffer import Control.Monad.ST +import Data.Primitive.SmallArray import Data.RRBVector.Internal.IntRef import qualified Data.RRBVector.Internal.Array as A -- | A mutable array buffer with a fixed capacity. -data Buffer s a = Buffer !(A.MutableArray s a) !(IntRef s) +data Buffer s a = Buffer !(SmallMutableArray s a) !(IntRef s) -- | \(O(n)\). Create a new empty buffer with the given capacity. new :: Int -> ST s (Buffer s a) new capacity = do - buffer <- A.new capacity + buffer <- newSmallArray capacity uninitialized offset <- newIntRef 0 pure (Buffer buffer offset) +uninitialized :: a +uninitialized = errorWithoutStackTrace "uninitialized" + -- | \(O(1)\). Push a new element onto the buffer. -- The size of the buffer must not exceed the capacity, but this is not checked. push :: Buffer s a -> a -> ST s () push (Buffer buffer offset) x = do idx <- readIntRef offset - A.write buffer idx x + writeSmallArray buffer idx x writeIntRef offset (idx + 1) -- | \(O(n)\). Freeze the content of the buffer and return it. @@ -39,9 +43,9 @@ push (Buffer buffer offset) x = do get :: Buffer s a -> ST s (A.Array a) get (Buffer buffer offset) = do len <- readIntRef offset - result <- A.freeze buffer 0 len + result <- freezeSmallArray buffer 0 len writeIntRef offset 0 - pure result + pure (A.wrap result) -- | \(O(1)\). Return the current size of the buffer. size :: Buffer s a -> ST s Int From 1c7e2dc6974cfb709200c83ba61896925d52e289 Mon Sep 17 00:00:00 2001 From: konsumlamm <44230978+konsumlamm@users.noreply.github.com> Date: Fri, 29 Dec 2023 15:13:43 +0100 Subject: [PATCH 2/9] Make sure that the result of `><` is normalized (#19) --- src/Data/RRBVector/Internal.hs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Data/RRBVector/Internal.hs b/src/Data/RRBVector/Internal.hs index 73d0fd8..1104778 100644 --- a/src/Data/RRBVector/Internal.hs +++ b/src/Data/RRBVector/Internal.hs @@ -678,12 +678,9 @@ findIndicesR f = ifoldl (\i acc x -> if f x then i : acc else acc) [] Empty >< v = v v >< Empty = v Root size1 sh1 tree1 >< Root size2 sh2 tree2 = - let maxShift = max sh1 sh2 - upMaxShift = up maxShift + let upMaxShift = up (max sh1 sh2) newArr = mergeTrees tree1 sh1 tree2 sh2 - in if length newArr == 1 - then Root (size1 + size2) maxShift (A.head newArr) - else Root (size1 + size2) upMaxShift (computeSizes upMaxShift newArr) + in normalize $ Root (size1 + size2) upMaxShift (computeSizes upMaxShift newArr) where mergeTrees tree1@(Leaf arr1) !_ tree2@(Leaf arr2) !_ | length arr1 == blockSize = A.from2 tree1 tree2 From 2862e05ad7659a881440e5c909ec56b59bb231ec Mon Sep 17 00:00:00 2001 From: konsumlamm <44230978+konsumlamm@users.noreply.github.com> Date: Sat, 30 Dec 2023 14:02:43 +0100 Subject: [PATCH 3/9] Fix wrong `insertShift` computation in `<|` (#18) --- src/Data/RRBVector/Internal.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Data/RRBVector/Internal.hs b/src/Data/RRBVector/Internal.hs index 1104778..b67dd51 100644 --- a/src/Data/RRBVector/Internal.hs +++ b/src/Data/RRBVector/Internal.hs @@ -174,7 +174,7 @@ computeSizes !sh arr where subtree = A.index arr i --- Integer log base 2. +-- | Integer log base 2. log2 :: Int -> Int log2 x = bitSizeMinus1 - countLeadingZeros x where @@ -765,7 +765,10 @@ x <| Root size sh tree -- compute the shift at which the new branch needs to be inserted (0 means there is space in the leaf) -- the size is computed for efficient calculation of the shift in a balanced subtree computeShift !sz !sh !min (Balanced _) = - let newShift = (log2 sz `div` blockShift) * blockShift + -- @sz - 1@ is the index of the last element + let hiShift = max ((log2 (sz - 1) `div` blockShift) * blockShift) 0 -- the shift of the root when normalizing + hi = (sz - 1) `unsafeShiftR` hiShift -- the length of the root node when normalizing minus 1 + newShift = if hi < blockMask then hiShift else hiShift + blockShift in if newShift > sh then min else newShift computeShift _ sh min (Unbalanced arr sizes) = let sz' = indexPrimArray sizes 0 -- the size of the first subtree From f4326fff047f28a658303206973e48256bc65fff Mon Sep 17 00:00:00 2001 From: Soumik Sarkar Date: Mon, 1 Jan 2024 18:49:09 +0530 Subject: [PATCH 4/9] Add property tests for invariants (#17) --- src/Data/RRBVector/Internal/Debug.hs | 117 ++++++++++++++++++++++++++- test/Properties.hs | 27 +++++++ 2 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/Data/RRBVector/Internal/Debug.hs b/src/Data/RRBVector/Internal/Debug.hs index 5f5924c..61a42e5 100644 --- a/src/Data/RRBVector/Internal/Debug.hs +++ b/src/Data/RRBVector/Internal/Debug.hs @@ -10,16 +10,19 @@ module Data.RRBVector.Internal.Debug , pattern Empty, pattern Root , Tree, Shift , pattern Balanced, pattern Unbalanced, pattern Leaf + , Invariant, valid ) where import Control.Monad.ST (runST) -import Data.Foldable (toList) +import Data.Bits (shiftL) +import Data.Foldable (foldl', toList, traverse_) import Data.List (intercalate) -import Data.Primitive.PrimArray (PrimArray, primArrayToList) +import Data.Primitive.PrimArray (PrimArray, primArrayToList, indexPrimArray, sizeofPrimArray) import Data.RRBVector.Internal hiding (Empty, Root, Balanced, Unbalanced, Leaf) import qualified Data.RRBVector.Internal as RRB import Data.RRBVector.Internal.Array (Array) +import qualified Data.RRBVector.Internal.Array as A import qualified Data.RRBVector.Internal.Buffer as Buffer -- | \(O(n)\). Show the underlying tree of a vector. @@ -85,3 +88,113 @@ pattern Leaf :: Array a -> Tree a pattern Leaf arr <- RRB.Leaf arr {-# COMPLETE Balanced, Unbalanced, Leaf #-} + +-- | Structural invariants a vector is expected to hold. +data Invariant + = RootSizeGt0 -- Root: Size > 0 + | RootShiftDiv -- Root: The shift at the root is divisible by blockShift + | RootSizeCorrect -- Root: The size at the root is correct + | RootGt1Child -- Root: The root has more than 1 child if not a Leaf + | BalShiftGt0 -- Balanced: Shift > 0 + | BalNumChildren -- Balanced: The number of children is blockSize unless + -- the parent is unbalanced or the node is on the right + -- edge in which case it is in [1,blockSize] + | BalFullChildren -- Balanced: All children are full, except for the last + -- if the node is on the right edge + | UnbalShiftGt0 -- Unbalanced: Shift > 0 + | UnbalParentUnbal -- Unbalanced: Parent is Unbalanced + | UnbalNumChildren -- Unbalanced: The number of children is in [1,blockSize] + | UnbalSizes -- Unbalanced: The sizes array is correct + | UnbalNotBal -- Unbalanced: The tree is not full enough to be a + -- Balanced + | LeafShift0 -- Leaf: Shift == 0 + | LeafNumElems -- Leaf: The number of elements is in [1,blockSize] + deriving Show + +assert :: Invariant -> Bool -> Either Invariant () +assert i False = Left i +assert _ True = pure () + +-- | Check tree invariants. Returns @Left@ on finding a violated invariant. +valid :: Vector a -> Either Invariant () +valid RRB.Empty = pure () +valid (RRB.Root size sh tree) = do + assert RootSizeGt0 $ size > 0 + assert RootShiftDiv $ sh `mod` blockShift == 0 + assert RootSizeCorrect $ size == countElems tree + assert RootGt1Child $ case tree of + Balanced arr -> length arr > 1 + Unbalanced arr _ -> length arr > 1 + Leaf _ -> True + validTree Unbal sh tree + +data NodeDesc + = Bal -- parent is Balanced + | BalRightEdge -- parent is Balanced and this node is on the right edge + | Unbal -- parent is Unbalanced + +validTree :: NodeDesc -> Shift -> Tree a -> Either Invariant () +validTree desc sh (RRB.Balanced arr) = do + assert BalShiftGt0 $ sh > 0 + assert BalNumChildren $ case desc of + Bal -> n == blockSize + BalRightEdge -> n >= 1 && n <= blockSize + Unbal -> n >= 1 && n <= blockSize + assert BalFullChildren $ + all (\t -> countElems t == 1 `shiftL` sh) expectedFullChildren + traverse_ (validTree Bal (down sh)) arrInit + validTree descLast (down sh) (A.last arr) + where + n = length arr + arrInit = A.take arr (n-1) + expectedFullChildren = case desc of + Bal -> arr + BalRightEdge -> arrInit + Unbal -> arrInit + descLast = case desc of + Bal -> Bal + BalRightEdge -> BalRightEdge + Unbal -> BalRightEdge +validTree desc sh (RRB.Unbalanced arr sizes) = do + assert UnbalShiftGt0 $ sh > 0 + case desc of + Bal -> assert UnbalParentUnbal False + BalRightEdge -> assert UnbalParentUnbal False + Unbal -> assert UnbalNumChildren $ n >= 1 && n <= blockSize + assert UnbalSizes $ n == sizeofPrimArray sizes + assert UnbalSizes $ + all (\i -> countElems (A.index arr i) == getSize sizes i) [0 .. n-1] + assert UnbalNotBal $ not (couldBeBalanced sh arr sizes) + traverse_ (validTree Unbal (down sh)) arr + where + n = length arr +validTree desc sh (RRB.Leaf arr) = do + assert LeafShift0 $ sh == 0 + assert LeafNumElems $ case desc of + Bal -> n == blockSize + BalRightEdge -> n >= 1 && n <= blockSize + Unbal -> n >= 1 && n <= blockSize + where + n = length arr + +-- | Check whether an Unbalanced node could be Balanced. +couldBeBalanced :: Shift -> A.Array (Tree a) -> PrimArray Int -> Bool +couldBeBalanced sh arr sizes = + all (\i -> getSize sizes i == 1 `shiftL` sh) [0 .. n-2] && + (case A.last arr of + Balanced _ -> True + Unbalanced arr' sizes' -> couldBeBalanced (down sh) arr' sizes' + Leaf _ -> True) + where + n = length arr + +getSize :: PrimArray Int -> Int -> Int +getSize sizes 0 = indexPrimArray sizes 0 +getSize sizes i = indexPrimArray sizes i - indexPrimArray sizes (i-1) + +countElems :: Tree a -> Int +countElems (RRB.Balanced arr) = + foldl' (\acc tree -> acc + countElems tree) 0 arr +countElems (RRB.Unbalanced arr _) = + foldl' (\acc tree -> acc + countElems tree) 0 arr +countElems (RRB.Leaf arr) = length arr diff --git a/test/Properties.hs b/test/Properties.hs index f6145f5..d93fb64 100644 --- a/test/Properties.hs +++ b/test/Properties.hs @@ -16,6 +16,7 @@ import Prelude hiding ((==)) -- use @===@ instead import qualified Data.Sequence as Seq import qualified Data.RRBVector as V +import qualified Data.RRBVector.Internal.Debug as VDebug import Test.QuickCheck.Classes.Base import Test.Tasty import Test.Tasty.QuickCheck @@ -65,6 +66,13 @@ proxyVInt = Proxy proxyV :: Proxy V proxyV = Proxy +checkValid :: Show a => V a -> Property +checkValid v = case VDebug.valid v of + Left invariant -> + counterexample ("Invariant violated: " ++ show invariant) $ + counterexample (VDebug.showTree v) False + _ -> property () + properties :: TestTree properties = testGroup "properties" [ testGroup "fromList" @@ -72,22 +80,27 @@ properties = testGroup "properties" , testProperty "satisfies `toList . fromList = id`" $ \ls -> toList (V.fromList ls) === ls , testProperty "satisfies `fromList [] = empty`" $ V.fromList [] === V.empty , testProperty "satisfies `fromList [x] = singleton x`" $ \x -> V.fromList [x] === V.singleton x + , testProperty "valid" $ \xs -> checkValid (V.fromList xs) ] , testGroup "replicate" [ testProperty "satisifes `replicate n == fromList . replicate n`" $ \(Positive n) x -> V.replicate n x === V.fromList (replicate n x) , testProperty "returns the empty vector for non-positive n" $ \(NonPositive n) x -> V.replicate n x === V.empty + , testProperty "valid" $ \n x -> checkValid (V.replicate n x) ] , testGroup "<|" [ testProperty "prepends an element" $ \x v -> toList (x V.<| v) === x : toList v , testProperty "works for the empty vector" $ \x -> x V.<| V.empty === V.singleton x + , testProperty "valid" $ \x v -> checkValid (x V.<| v) ] , testGroup "|>" [ testProperty "appends an element" $ \v x -> toList (v V.|> x) === toList v ++ [x] , testProperty "works for the empty vector" $ \x -> V.empty V.|> x === V.singleton x + , testProperty "valid" $ \v x -> checkValid (v V.|> x) ] , testGroup "><" [ testProperty "concatenates two vectors" $ \v1 v2 -> toList (v1 V.>< v2) === toList v1 ++ toList v2 , testProperty "works for the empty vector" $ \v -> (V.empty V.>< v === v) .&&. (v V.>< V.empty === v) + , testProperty "valid" $ \v1 v2 -> checkValid (v1 V.>< v2) ] , testGroup "lookup" [ testProperty "gets the element at the index" $ \v (NonNegative i) -> V.lookup i v === lookupList i (toList v) @@ -96,33 +109,41 @@ properties = testGroup "properties" , testGroup "update" [ testProperty "updates the element at the index" $ \v (NonNegative i) x -> toList (V.update i x v) === updateList i x (toList v) , testProperty "returns the vector for negative indices" $ \v (Negative i) x -> V.update i x v === v + , testProperty "valid" $ \v i x -> checkValid (V.update i x v) ] , testGroup "adjust" [ testProperty "adjusts the element at the index" $ \v (NonNegative i) (Fn f) -> toList (V.adjust i f v) === adjustList i f (toList v) , testProperty "returns the vector for negative indices" $ \v (Negative i) (Fn f) -> V.adjust i f v === v + , testProperty "valid" $ \v i (Fn f) -> checkValid (V.adjust i f v) ] , testGroup "adjust'" [ testProperty "adjusts the element at the index" $ \v (NonNegative i) (Fn f) -> toList (V.adjust' i f v) === adjustList i f (toList v) , testProperty "returns the vector for negative indices" $ \v (Negative i) (Fn f) -> V.adjust' i f v === v + , testProperty "valid" $ \v i (Fn f) -> checkValid (V.adjust' i f v) ] , testGroup "viewl" [ testProperty "works like uncons" $ \v -> fmap (\(x, xs) -> (x, toList xs)) (V.viewl v) === uncons (toList v) , testProperty "works for the empty vector" $ V.viewl V.empty === Nothing + , testProperty "valid" $ \v -> fmap (checkValid . snd) (V.viewl v) ] , testGroup "viewr" [ testProperty "works like unsnoc" $ \v -> fmap (\(xs, x) -> (toList xs, x)) (V.viewr v) === unsnoc (toList v) , testProperty "works for the empty vector" $ V.viewr V.empty === Nothing + , testProperty "valid" $ \v -> fmap (checkValid . fst) (V.viewr v) ] , testGroup "take" [ testProperty "takes n elements" $ \v (Positive n) -> toList (V.take n v) === take n (toList v) , testProperty "returns the empty vector for non-positive n" $ \v (NonPositive n) -> V.take n v === V.empty + , testProperty "valid" $ \v n -> checkValid (V.take n v) ] , testGroup "drop" [ testProperty "drops n elements" $ \v (Positive n) -> toList (V.drop n v) === drop n (toList v) , testProperty "returns the vector for non-positive n" $ \v (NonPositive n) -> V.drop n v === v + , testProperty "valid" $ \v n -> checkValid (V.drop v n) ] , testGroup "splitAt" [ testProperty "splits the vector" $ \v n -> let (v1, v2) = V.splitAt n v in (toList v1, toList v2) === splitAt n (toList v) + , testProperty "valid" $ \v n -> let (v1, v2) = V.splitAt n v in checkValid v1 .&&. checkValid v2 ] , testGroup "insertAt" [ testProperty "inserts an element" $ \v i x -> toList (V.insertAt i x v) === insertAtList i x (toList v) @@ -130,6 +151,7 @@ properties = testGroup "properties" , testProperty "appends for too large indices" $ \v x -> forAll (arbitrary `suchThat` (> length v)) $ \i -> V.insertAt i x v === v V.|> x , testProperty "satisfies `insertAt 0 x v = x <| v`" $ \v x -> V.insertAt 0 x v === x V.<| v , testProperty "satisfies `insertAt (length v) x v = v |> x`" $ \v x -> V.insertAt (length v) x v === v V.|> x + , testProperty "valid" $ \v i x -> checkValid (V.insertAt i x v) ] , testGroup "deleteAt" [ testProperty "deletes an element" $ \v (NonNegative i) -> toList (V.deleteAt i v) === deleteAtList i (toList v) @@ -137,6 +159,7 @@ properties = testGroup "properties" , testProperty "returns the vector for too large indices" $ \v -> forAll (arbitrary `suchThat` (>= length v)) $ \i -> V.deleteAt i v === v , testProperty "satisfies `deleteAt 0 v = drop 1 v`" $ \v -> V.deleteAt 0 v === V.drop 1 v , testProperty "satisfies `deleteAt (length v - 1) v = take (length v - 1) v`" $ \v -> V.deleteAt (length v - 1) v === V.take (length v - 1) v + , testProperty "valid" $ \v i -> checkValid (V.deleteAt i v) ] , testGroup "findIndexL" [ testProperty "finds the first index" $ \v (Fn f) -> V.findIndexL f v === Seq.findIndexL f (Seq.fromList (toList v)) @@ -156,16 +179,20 @@ properties = testGroup "properties" ] , testGroup "reverse" [ testProperty "reverses the vector" $ \v -> toList (V.reverse v) === reverse (toList v) + , testProperty "valid" $ \v -> checkValid (V.reverse v) ] , testGroup "zip" [ testProperty "zips two vectors" $ \v1 v2 -> toList (V.zip v1 v2) === zip (toList v1) (toList v2) + , testProperty "valid" $ \v1 v2 -> checkValid (V.zip v1 v2) ] , testGroup "zipWith" [ testProperty "zips two vectors with a function" $ \v1 v2 -> toList (V.zipWith (+) v1 v2) === zipWith (+) (toList v1) (toList v2) , testProperty "satisfies `zipWith (,) v1 v2 = zip v1 v2`" $ \v1 v2 -> V.zipWith (,) v1 v2 === V.zip v1 v2 + , testProperty "valid" $ \v1 v2 (Fn2 f) -> checkValid (V.zipWith f v1 v2) ] , testGroup "unzip" [ testProperty "unzips the vector" $ \v -> (\(xs, ys) -> (toList xs, toList ys)) (V.unzip v) === unzip (toList v) + , testProperty "valid" $ \v -> let (v1, v2) = V.unzip v in checkValid v1 .&&. checkValid v2 ] , instances , laws From 7d3db7bff351ddd4388887b00677c8c1c08e6b19 Mon Sep 17 00:00:00 2001 From: konsumlamm Date: Sun, 16 Jun 2024 02:27:32 +0200 Subject: [PATCH 5/9] Add missing `since` annotation --- src/Data/RRBVector/Internal.hs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Data/RRBVector/Internal.hs b/src/Data/RRBVector/Internal.hs index b67dd51..b7642ab 100644 --- a/src/Data/RRBVector/Internal.hs +++ b/src/Data/RRBVector/Internal.hs @@ -653,18 +653,26 @@ deleteAt :: Int -> Vector a -> Vector a deleteAt i v = let (left, right) = splitAt (i + 1) v in take i left >< right -- | \(O(n)\). Find the first index from the left that satisfies the predicate. +-- +-- @since 0.2.1.0 findIndexL :: (a -> Bool) -> Vector a -> Maybe Int findIndexL f = ifoldr (\i x acc -> if f x then Just i else acc) Nothing -- | \(O(n)\). Find the first index from the right that satisfies the predicate. +-- +-- @since 0.2.1.0 findIndexR :: (a -> Bool) -> Vector a -> Maybe Int findIndexR f = ifoldl (\i acc x -> if f x then Just i else acc) Nothing -- | \(O(n)\). Find the indices that satisfy the predicate, starting from the left. +-- +-- @since 0.2.1.0 findIndicesL :: (a -> Bool) -> Vector a -> [Int] findIndicesL f = ifoldr (\i x acc -> if f x then i : acc else acc) [] -- | \(O(n)\). Find the indices that satisfy the predicate, starting from the right. +-- +-- @since 0.2.1.0 findIndicesR :: (a -> Bool) -> Vector a -> [Int] findIndicesR f = ifoldl (\i acc x -> if f x then i : acc else acc) [] From ce10b488d20e4e89e9352f828c2db5bece254eca Mon Sep 17 00:00:00 2001 From: konsumlamm Date: Tue, 18 Jun 2024 17:01:30 +0200 Subject: [PATCH 6/9] Update CI --- .github/workflows/haskell.yaml | 4 ++-- rrb-vector.cabal | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/haskell.yaml b/.github/workflows/haskell.yaml index 9683c00..12c153f 100644 --- a/.github/workflows/haskell.yaml +++ b/.github/workflows/haskell.yaml @@ -10,14 +10,14 @@ jobs: strategy: fail-fast: false matrix: - ghc: ['8.4', '8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8'] + ghc: ['8.4', '8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8', '9.10'] steps: - uses: actions/checkout@v4 - uses: haskell-actions/setup@v2 id: setup-haskell with: ghc-version: ${{ matrix.ghc }} - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: | ${{ steps.setup-haskell.outputs.cabal-store }} diff --git a/rrb-vector.cabal b/rrb-vector.cabal index 961b35a..806fbad 100644 --- a/rrb-vector.cabal +++ b/rrb-vector.cabal @@ -30,8 +30,9 @@ tested-with: GHC == 9.0.2 GHC == 9.2.8 GHC == 9.4.8 - GHC == 9.6.3 - GHC == 9.8.1 + GHC == 9.6.5 + GHC == 9.8.2 + GHC == 9.10.1 source-repository head type: git From 9b1f048f9a782645fe4714e662bf08480448f714 Mon Sep 17 00:00:00 2001 From: konsumlamm Date: Tue, 18 Jun 2024 19:40:33 +0200 Subject: [PATCH 7/9] Optimize `*>` by using `stimes` --- src/Data/RRBVector/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RRBVector/Internal.hs b/src/Data/RRBVector/Internal.hs index b7642ab..62921ec 100644 --- a/src/Data/RRBVector/Internal.hs +++ b/src/Data/RRBVector/Internal.hs @@ -343,7 +343,7 @@ instance Applicative Vector where pure = singleton fs <*> xs = foldl' (\acc f -> acc >< map f xs) empty fs liftA2 f xs ys = foldl' (\acc x -> acc >< map (f x) ys) empty xs - xs *> ys = foldl' (\acc _ -> acc >< ys) empty xs + xs *> ys = stimes (length xs) ys xs <* ys = foldl' (\acc x -> acc >< replicate (length ys) x) empty xs instance Monad Vector where From 287bebcee82c9a9a4dc495ae396b8439c19ab7f8 Mon Sep 17 00:00:00 2001 From: konsumlamm <44230978+konsumlamm@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:04:59 +0200 Subject: [PATCH 8/9] Add sorting functions (#23) Bump `primitive` version --- CHANGELOG.md | 4 +++ rrb-vector.cabal | 8 ++++- src/Data/RRBVector.hs | 5 +++ src/Data/RRBVector/Internal/Sorting.hs | 49 ++++++++++++++++++++++++++ test/Properties.hs | 8 ++++- 5 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 src/Data/RRBVector/Internal/Sorting.hs diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d77d2..fd6453a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.2.0 - upcoming + +* Add `sort`, `sortBy`, `sortOn` ([#23](https://github.com/konsumlamm/rrb-vector/pull/22)) + # 0.2.1.0 - December 2023 * Add `findIndexL`, `findIndexR`, `findIndicesL`, `findIndicesR` diff --git a/rrb-vector.cabal b/rrb-vector.cabal index 806fbad..3efb901 100644 --- a/rrb-vector.cabal +++ b/rrb-vector.cabal @@ -48,7 +48,13 @@ library Data.RRBVector.Internal.Array Data.RRBVector.Internal.Buffer Data.RRBVector.Internal.IntRef - build-depends: base >= 4.11 && < 5, deepseq >= 1.4.3 && < 1.6, indexed-traversable ^>= 0.1, primitive >= 0.7 && < 0.10 + Data.RRBVector.Internal.Sorting + build-depends: + base >= 4.11 && < 5, + deepseq >= 1.4.3 && < 1.6, + indexed-traversable ^>= 0.1, + primitive >= 0.7.3 && < 0.10, + samsort ^>= 0.1 ghc-options: -O2 -Wall -Wno-name-shadowing -Werror=missing-methods -Werror=missing-fields default-language: Haskell2010 diff --git a/src/Data/RRBVector.hs b/src/Data/RRBVector.hs index 2a42dde..a9e9030 100644 --- a/src/Data/RRBVector.hs +++ b/src/Data/RRBVector.hs @@ -44,6 +44,10 @@ module Data.RRBVector , map, map', reverse -- * Zipping and unzipping , zip, zipWith, unzip, unzipWith + -- * Sorting + -- + -- | Currently implemented using [samsort](https://hackage.haskell.org/package/samsort). + , sort, sortBy, sortOn ) where import Prelude hiding (replicate, lookup, take, drop, splitAt, map, reverse, zip, zipWith, unzip) @@ -53,3 +57,4 @@ import Data.Functor.WithIndex import Data.Traversable.WithIndex import Data.RRBVector.Internal +import Data.RRBVector.Internal.Sorting diff --git a/src/Data/RRBVector/Internal/Sorting.hs b/src/Data/RRBVector/Internal/Sorting.hs new file mode 100644 index 0000000..1d09dbc --- /dev/null +++ b/src/Data/RRBVector/Internal/Sorting.hs @@ -0,0 +1,49 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} + +module Data.RRBVector.Internal.Sorting + ( sort + , sortBy + , sortOn + ) where + +import Data.Foldable (toList) +import Data.Foldable.WithIndex (ifor_) +import Data.Primitive.Array +import Data.SamSort (sortArrayBy) +import Data.Semigroup (Arg(..)) + +import Data.RRBVector.Internal + +uninitialized :: a +uninitialized = errorWithoutStackTrace "uninitialized" + +-- | \(O(n \log n)\). Sort the vector in ascending order. +-- The sort is stable, meaning the order of equal elements is preserved. +-- +-- @since 0.2.2.0 +sort :: (Ord a) => Vector a -> Vector a +sort = sortBy compare + +-- | \(O(n \log n)\). Sort the vector in ascending order according to the specified comparison function. +-- The sort is stable, meaning the order of equal elements is preserved. +-- +-- @since 0.2.2.0 +sortBy :: (a -> a -> Ordering) -> Vector a -> Vector a +sortBy cmp v = + let sortedArr = createArray (length v) uninitialized $ \arr@(MutableArray arr#) -> do + ifor_ v (writeArray arr) + sortArrayBy cmp arr# 0 (length v) + in fromList . toList $ sortedArr + +-- | \(O(n \log n)\). Sort the vector in ascending order by comparing the results of applying the key function to each element. +-- The sort is stable, meaning the order of equal elements is preserved. +-- @`sortOn` f@ is equivalent to @`sortBy` (`Data.Ord.comparing` f)@, but only evaluates @f@ once for each element. +-- +-- @since 0.2.2.0 +sortOn :: (Ord b) => (a -> b) -> Vector a -> Vector a +sortOn f v = + let sortedArr = createArray (length v) uninitialized $ \arr@(MutableArray arr#) -> do + ifor_ v $ \i x -> let !y = f x in writeArray arr i (Arg y x) + sortArrayBy compare arr# 0 (length v) + in fromList . fmap (\(Arg _ x) -> x) . toList $ sortedArr diff --git a/test/Properties.hs b/test/Properties.hs index d93fb64..15147e7 100644 --- a/test/Properties.hs +++ b/test/Properties.hs @@ -10,7 +10,8 @@ module Properties import Control.Applicative (liftA2) #endif import Data.Foldable (Foldable(..)) -import Data.List (uncons) +import Data.List (sort, sortBy, sortOn, uncons) +import Data.Ord (comparing) import Data.Proxy (Proxy(..)) import Prelude hiding ((==)) -- use @===@ instead @@ -194,6 +195,11 @@ properties = testGroup "properties" [ testProperty "unzips the vector" $ \v -> (\(xs, ys) -> (toList xs, toList ys)) (V.unzip v) === unzip (toList v) , testProperty "valid" $ \v -> let (v1, v2) = V.unzip v in checkValid v1 .&&. checkValid v2 ] + , localOption (QuickCheckMaxSize 1000) $ testGroup "sorting" + [ testProperty "sort" $ \v -> toList (V.sort v) === sort (toList v) + , testProperty "sortBy" $ \v -> let cmp = comparing fst in toList (V.sortBy cmp v) === sortBy cmp (toList v) + , testProperty "sortOn" $ \v -> let f = odd in toList (V.sortOn f v) === sortOn f (toList v) + ] , instances , laws , issues From e489bd208970b3a2db9a210428005f5551232f16 Mon Sep 17 00:00:00 2001 From: konsumlamm Date: Fri, 21 Jun 2024 17:01:41 +0200 Subject: [PATCH 9/9] Inline `findIndexL`, `findIndexR`, `findIndicesL`, `findIndicesR` --- bench/Main.hs | 7 +++++++ src/Data/RRBVector/Internal.hs | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/bench/Main.hs b/bench/Main.hs index a3b5ba0..21c31d7 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -19,9 +19,16 @@ main = defaultMain $ [10, 100, 1000, 10000, 100000] <&> \n -> , bench "<|" $ whnf (42 RRB.<|) v , bench "take" $ whnf (RRB.take idx) v , bench "drop" $ whnf (RRB.drop idx) v + , bench "splitAt" $ whnf (RRB.splitAt idx) v + , bench "insertAt" $ whnf (RRB.insertAt idx 42) v + , bench "deleteAt" $ whnf (RRB.deleteAt idx) v , bench "index" $ nf (RRB.lookup idx) v , bench "adjust" $ whnf (RRB.adjust idx (+ 1)) v , bench "map" $ whnf (RRB.map (+ 1)) v , bench "foldl" $ nf (foldl (+) 0) v , bench "foldr" $ nf (foldr (+) 0) v + , bench "findIndexL" $ nf (RRB.findIndexL (== idx)) v + , bench "findIndexR" $ nf (RRB.findIndexR (== idx)) v + , bench "findIndicesL" $ nf (RRB.findIndicesL (== idx)) v + , bench "findIndicesR" $ nf (RRB.findIndicesR (== idx)) v ] diff --git a/src/Data/RRBVector/Internal.hs b/src/Data/RRBVector/Internal.hs index 62921ec..775ac27 100644 --- a/src/Data/RRBVector/Internal.hs +++ b/src/Data/RRBVector/Internal.hs @@ -657,24 +657,28 @@ deleteAt i v = let (left, right) = splitAt (i + 1) v in take i left >< right -- @since 0.2.1.0 findIndexL :: (a -> Bool) -> Vector a -> Maybe Int findIndexL f = ifoldr (\i x acc -> if f x then Just i else acc) Nothing +{-# INLINE findIndexL #-} -- | \(O(n)\). Find the first index from the right that satisfies the predicate. -- -- @since 0.2.1.0 findIndexR :: (a -> Bool) -> Vector a -> Maybe Int findIndexR f = ifoldl (\i acc x -> if f x then Just i else acc) Nothing +{-# INLINE findIndexR #-} -- | \(O(n)\). Find the indices that satisfy the predicate, starting from the left. -- -- @since 0.2.1.0 findIndicesL :: (a -> Bool) -> Vector a -> [Int] findIndicesL f = ifoldr (\i x acc -> if f x then i : acc else acc) [] +{-# INLINE findIndicesL #-} -- | \(O(n)\). Find the indices that satisfy the predicate, starting from the right. -- -- @since 0.2.1.0 findIndicesR :: (a -> Bool) -> Vector a -> [Int] findIndicesR f = ifoldl (\i acc x -> if f x then i : acc else acc) [] +{-# INLINE findIndicesR #-} -- concatenation