Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjamin committed Jun 24, 2024
2 parents 383cbf7 + e489bd2 commit e2f238d
Show file tree
Hide file tree
Showing 11 changed files with 259 additions and 22 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/haskell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ jobs:
strategy:
fail-fast: false
matrix:
ghc: ['8.4', '8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8']
ghc: ['8.4', '8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8', '9.10']
steps:
- uses: actions/checkout@v4
- uses: haskell-actions/setup@v2
id: setup-haskell
with:
ghc-version: ${{ matrix.ghc }}
- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: |
${{ steps.setup-haskell.outputs.cabal-store }}
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.2.2.0 - upcoming

* Add `sort`, `sortBy`, `sortOn` ([#23](https://github.com/konsumlamm/rrb-vector/pull/22))

# 0.2.1.0 - December 2023

* Add `findIndexL`, `findIndexR`, `findIndicesL`, `findIndicesR`
Expand Down
7 changes: 7 additions & 0 deletions bench/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,16 @@ main = defaultMain $ [10, 100, 1000, 10000, 100000] <&> \n ->
, bench "<|" $ whnf (42 RRB.<|) v
, bench "take" $ whnf (RRB.take idx) v
, bench "drop" $ whnf (RRB.drop idx) v
, bench "splitAt" $ whnf (RRB.splitAt idx) v
, bench "insertAt" $ whnf (RRB.insertAt idx 42) v
, bench "deleteAt" $ whnf (RRB.deleteAt idx) v
, bench "index" $ nf (RRB.lookup idx) v
, bench "adjust" $ whnf (RRB.adjust idx (+ 1)) v
, bench "map" $ whnf (RRB.map (+ 1)) v
, bench "foldl" $ nf (foldl (+) 0) v
, bench "foldr" $ nf (foldr (+) 0) v
, bench "findIndexL" $ nf (RRB.findIndexL (== idx)) v
, bench "findIndexR" $ nf (RRB.findIndexR (== idx)) v
, bench "findIndicesL" $ nf (RRB.findIndicesL (== idx)) v
, bench "findIndicesR" $ nf (RRB.findIndicesR (== idx)) v
]
13 changes: 10 additions & 3 deletions rrb-vector-strict.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ tested-with:
GHC == 9.0.2
GHC == 9.2.8
GHC == 9.4.8
GHC == 9.6.3
GHC == 9.8.1
GHC == 9.6.5
GHC == 9.8.2
GHC == 9.10.1

source-repository head
type: git
Expand All @@ -49,7 +50,13 @@ library
Data.RRBVector.Strict.Internal.Array
Data.RRBVector.Strict.Internal.Buffer
Data.RRBVector.Strict.Internal.IntRef
build-depends: base >= 4.11 && < 5, deepseq >= 1.4.3 && < 1.6, indexed-traversable ^>= 0.1, primitive >= 0.7 && < 0.10
Data.RRBVector.Strict.Internal.Sorting
build-depends:
base >= 4.11 && < 5,
deepseq >= 1.4.3 && < 1.6,
indexed-traversable ^>= 0.1,
primitive >= 0.7.3 && < 0.10,
samsort ^>= 0.1
ghc-options: -O2 -Wall -Wno-name-shadowing -Werror=missing-methods -Werror=missing-fields
default-language: Haskell2010

Expand Down
5 changes: 5 additions & 0 deletions src/Data/RRBVector/Strict.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ module Data.RRBVector.Strict
, map, map', reverse
-- * Zipping and unzipping
, zip, zipWith, unzip, unzipWith
-- * Sorting
--
-- | Currently implemented using [samsort](https://hackage.haskell.org/package/samsort).
, sort, sortBy, sortOn
) where

import Prelude hiding (replicate, lookup, take, drop, splitAt, map, reverse, zip, zipWith, unzip)
Expand All @@ -53,3 +57,4 @@ import Data.Functor.WithIndex
import Data.Traversable.WithIndex

import Data.RRBVector.Strict.Internal
import Data.RRBVector.Strict.Internal.Sorting
28 changes: 20 additions & 8 deletions src/Data/RRBVector/Strict/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ computeSizes !sh arr
where
subtree = A.index arr i

-- Integer log base 2.
-- | Integer log base 2.
log2 :: Int -> Int
log2 x = bitSizeMinus1 - countLeadingZeros x
where
Expand Down Expand Up @@ -343,7 +343,7 @@ instance Applicative Vector where
pure = singleton
fs <*> xs = foldl' (\acc f -> acc >< map f xs) empty fs
liftA2 f xs ys = foldl' (\acc x -> acc >< map (f x) ys) empty xs
xs *> ys = foldl' (\acc _ -> acc >< ys) empty xs
xs *> ys = stimes (length xs) ys
xs <* ys = foldl' (\acc x -> acc >< replicate (length ys) x) empty xs

instance Monad Vector where
Expand Down Expand Up @@ -653,20 +653,32 @@ deleteAt :: Int -> Vector a -> Vector a
deleteAt i v = let (left, right) = splitAt (i + 1) v in take i left >< right

-- | \(O(n)\). Find the first index from the left that satisfies the predicate.
--
-- @since 0.2.1.0
findIndexL :: (a -> Bool) -> Vector a -> Maybe Int
findIndexL f = ifoldr (\i x acc -> if f x then Just i else acc) Nothing
{-# INLINE findIndexL #-}

-- | \(O(n)\). Find the first index from the right that satisfies the predicate.
--
-- @since 0.2.1.0
findIndexR :: (a -> Bool) -> Vector a -> Maybe Int
findIndexR f = ifoldl (\i acc x -> if f x then Just i else acc) Nothing
{-# INLINE findIndexR #-}

-- | \(O(n)\). Find the indices that satisfy the predicate, starting from the left.
--
-- @since 0.2.1.0
findIndicesL :: (a -> Bool) -> Vector a -> [Int]
findIndicesL f = ifoldr (\i x acc -> if f x then i : acc else acc) []
{-# INLINE findIndicesL #-}

-- | \(O(n)\). Find the indices that satisfy the predicate, starting from the right.
--
-- @since 0.2.1.0
findIndicesR :: (a -> Bool) -> Vector a -> [Int]
findIndicesR f = ifoldl (\i acc x -> if f x then i : acc else acc) []
{-# INLINE findIndicesR #-}

-- concatenation

Expand All @@ -678,12 +690,9 @@ findIndicesR f = ifoldl (\i acc x -> if f x then i : acc else acc) []
Empty >< v = v
v >< Empty = v
Root size1 sh1 tree1 >< Root size2 sh2 tree2 =
let maxShift = max sh1 sh2
upMaxShift = up maxShift
let upMaxShift = up (max sh1 sh2)
newArr = mergeTrees tree1 sh1 tree2 sh2
in if length newArr == 1
then Root (size1 + size2) maxShift (A.head newArr)
else Root (size1 + size2) upMaxShift (computeSizes upMaxShift newArr)
in normalize $ Root (size1 + size2) upMaxShift (computeSizes upMaxShift newArr)
where
mergeTrees tree1@(Leaf arr1) !_ tree2@(Leaf arr2) !_
| length arr1 == blockSize = A.from2 tree1 tree2
Expand Down Expand Up @@ -768,7 +777,10 @@ x <| Root size sh tree
-- compute the shift at which the new branch needs to be inserted (0 means there is space in the leaf)
-- the size is computed for efficient calculation of the shift in a balanced subtree
computeShift !sz !sh !min (Balanced _) =
let newShift = (log2 sz `div` blockShift) * blockShift
-- @sz - 1@ is the index of the last element
let hiShift = max ((log2 (sz - 1) `div` blockShift) * blockShift) 0 -- the shift of the root when normalizing
hi = (sz - 1) `unsafeShiftR` hiShift -- the length of the root node when normalizing minus 1
newShift = if hi < blockMask then hiShift else hiShift + blockShift
in if newShift > sh then min else newShift
computeShift _ sh min (Unbalanced arr sizes) =
let sz' = indexPrimArray sizes 0 -- the size of the first subtree
Expand Down
5 changes: 4 additions & 1 deletion src/Data/RRBVector/Strict/Internal/Array.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
module Data.RRBVector.Strict.Internal.Array
( Array, MutableArray
, ifoldrStep, ifoldlStep, ifoldrStep', ifoldlStep'
, empty, singleton, from2
, empty, singleton, from2, wrap
, replicate, replicateSnoc
, index, head, last
, update, adjust, adjust'
Expand Down Expand Up @@ -122,6 +122,9 @@ from2 !x !y = Array 0 2 $ runSmallArray $ do
writeSmallArray sma 1 y
pure sma

wrap :: SmallArray a -> Array a
wrap arr = Array 0 (sizeofSmallArray arr) arr

replicate :: Int -> a -> Array a
replicate n !x = Array 0 n $ runSmallArray (newSmallArray n x)

Expand Down
14 changes: 9 additions & 5 deletions src/Data/RRBVector/Strict/Internal/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,39 @@ module Data.RRBVector.Strict.Internal.Buffer

import Control.Monad.ST

import Data.Primitive.SmallArray
import Data.RRBVector.Strict.Internal.IntRef
import qualified Data.RRBVector.Strict.Internal.Array as A

-- | A mutable array buffer with a fixed capacity.
data Buffer s a = Buffer !(A.MutableArray s a) !(IntRef s)
data Buffer s a = Buffer !(SmallMutableArray s a) !(IntRef s)

-- | \(O(n)\). Create a new empty buffer with the given capacity.
new :: Int -> ST s (Buffer s a)
new capacity = do
buffer <- A.new capacity
buffer <- newSmallArray capacity uninitialized
offset <- newIntRef 0
pure (Buffer buffer offset)

uninitialized :: a
uninitialized = errorWithoutStackTrace "uninitialized"

-- | \(O(1)\). Push a new element onto the buffer.
-- The size of the buffer must not exceed the capacity, but this is not checked.
push :: Buffer s a -> a -> ST s ()
push (Buffer buffer offset) !x = do
idx <- readIntRef offset
A.write buffer idx x
writeSmallArray buffer idx x
writeIntRef offset (idx + 1)

-- | \(O(n)\). Freeze the content of the buffer and return it.
-- This resets the buffer so that it is empty.
get :: Buffer s a -> ST s (A.Array a)
get (Buffer buffer offset) = do
len <- readIntRef offset
result <- A.freeze buffer 0 len
result <- freezeSmallArray buffer 0 len
writeIntRef offset 0
pure result
pure (A.wrap result)

-- | \(O(1)\). Return the current size of the buffer.
size :: Buffer s a -> ST s Int
Expand Down
117 changes: 115 additions & 2 deletions src/Data/RRBVector/Strict/Internal/Debug.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,19 @@ module Data.RRBVector.Strict.Internal.Debug
, pattern Empty, pattern Root
, Tree, Shift
, pattern Balanced, pattern Unbalanced, pattern Leaf
, Invariant, valid
) where

import Control.Monad.ST (runST)
import Data.Foldable (toList)
import Data.Bits (shiftL)
import Data.Foldable (foldl', toList, traverse_)

Check warning on line 18 in src/Data/RRBVector/Strict/Internal/Debug.hs

View workflow job for this annotation

GitHub Actions / build (9.10)

The import of ‘foldl'’ from module ‘Data.Foldable’ is redundant
import Data.List (intercalate)
import Data.Primitive.PrimArray (PrimArray, primArrayToList)
import Data.Primitive.PrimArray (PrimArray, primArrayToList, indexPrimArray, sizeofPrimArray)

import Data.RRBVector.Strict.Internal hiding (Empty, Root, Balanced, Unbalanced, Leaf)
import qualified Data.RRBVector.Strict.Internal as RRB
import Data.RRBVector.Strict.Internal.Array (Array)
import qualified Data.RRBVector.Strict.Internal.Array as A
import qualified Data.RRBVector.Strict.Internal.Buffer as Buffer

-- | \(O(n)\). Show the underlying tree of a vector.
Expand Down Expand Up @@ -85,3 +88,113 @@ pattern Leaf :: Array a -> Tree a
pattern Leaf arr <- RRB.Leaf arr

{-# COMPLETE Balanced, Unbalanced, Leaf #-}

-- | Structural invariants a vector is expected to hold.
data Invariant
= RootSizeGt0 -- Root: Size > 0
| RootShiftDiv -- Root: The shift at the root is divisible by blockShift
| RootSizeCorrect -- Root: The size at the root is correct
| RootGt1Child -- Root: The root has more than 1 child if not a Leaf
| BalShiftGt0 -- Balanced: Shift > 0
| BalNumChildren -- Balanced: The number of children is blockSize unless
-- the parent is unbalanced or the node is on the right
-- edge in which case it is in [1,blockSize]
| BalFullChildren -- Balanced: All children are full, except for the last
-- if the node is on the right edge
| UnbalShiftGt0 -- Unbalanced: Shift > 0
| UnbalParentUnbal -- Unbalanced: Parent is Unbalanced
| UnbalNumChildren -- Unbalanced: The number of children is in [1,blockSize]
| UnbalSizes -- Unbalanced: The sizes array is correct
| UnbalNotBal -- Unbalanced: The tree is not full enough to be a
-- Balanced
| LeafShift0 -- Leaf: Shift == 0
| LeafNumElems -- Leaf: The number of elements is in [1,blockSize]
deriving Show

assert :: Invariant -> Bool -> Either Invariant ()
assert i False = Left i
assert _ True = pure ()

-- | Check tree invariants. Returns @Left@ on finding a violated invariant.
valid :: Vector a -> Either Invariant ()
valid RRB.Empty = pure ()
valid (RRB.Root size sh tree) = do
assert RootSizeGt0 $ size > 0
assert RootShiftDiv $ sh `mod` blockShift == 0
assert RootSizeCorrect $ size == countElems tree
assert RootGt1Child $ case tree of
Balanced arr -> length arr > 1
Unbalanced arr _ -> length arr > 1
Leaf _ -> True
validTree Unbal sh tree

data NodeDesc
= Bal -- parent is Balanced
| BalRightEdge -- parent is Balanced and this node is on the right edge
| Unbal -- parent is Unbalanced

validTree :: NodeDesc -> Shift -> Tree a -> Either Invariant ()
validTree desc sh (RRB.Balanced arr) = do
assert BalShiftGt0 $ sh > 0
assert BalNumChildren $ case desc of
Bal -> n == blockSize
BalRightEdge -> n >= 1 && n <= blockSize
Unbal -> n >= 1 && n <= blockSize
assert BalFullChildren $
all (\t -> countElems t == 1 `shiftL` sh) expectedFullChildren
traverse_ (validTree Bal (down sh)) arrInit
validTree descLast (down sh) (A.last arr)
where
n = length arr
arrInit = A.take arr (n-1)
expectedFullChildren = case desc of
Bal -> arr
BalRightEdge -> arrInit
Unbal -> arrInit
descLast = case desc of
Bal -> Bal
BalRightEdge -> BalRightEdge
Unbal -> BalRightEdge
validTree desc sh (RRB.Unbalanced arr sizes) = do
assert UnbalShiftGt0 $ sh > 0
case desc of
Bal -> assert UnbalParentUnbal False
BalRightEdge -> assert UnbalParentUnbal False
Unbal -> assert UnbalNumChildren $ n >= 1 && n <= blockSize
assert UnbalSizes $ n == sizeofPrimArray sizes
assert UnbalSizes $
all (\i -> countElems (A.index arr i) == getSize sizes i) [0 .. n-1]
assert UnbalNotBal $ not (couldBeBalanced sh arr sizes)
traverse_ (validTree Unbal (down sh)) arr
where
n = length arr
validTree desc sh (RRB.Leaf arr) = do
assert LeafShift0 $ sh == 0
assert LeafNumElems $ case desc of
Bal -> n == blockSize
BalRightEdge -> n >= 1 && n <= blockSize
Unbal -> n >= 1 && n <= blockSize
where
n = length arr

-- | Check whether an Unbalanced node could be Balanced.
couldBeBalanced :: Shift -> A.Array (Tree a) -> PrimArray Int -> Bool
couldBeBalanced sh arr sizes =
all (\i -> getSize sizes i == 1 `shiftL` sh) [0 .. n-2] &&
(case A.last arr of
Balanced _ -> True
Unbalanced arr' sizes' -> couldBeBalanced (down sh) arr' sizes'
Leaf _ -> True)
where
n = length arr

getSize :: PrimArray Int -> Int -> Int
getSize sizes 0 = indexPrimArray sizes 0
getSize sizes i = indexPrimArray sizes i - indexPrimArray sizes (i-1)

countElems :: Tree a -> Int
countElems (RRB.Balanced arr) =
foldl' (\acc tree -> acc + countElems tree) 0 arr
countElems (RRB.Unbalanced arr _) =
foldl' (\acc tree -> acc + countElems tree) 0 arr
countElems (RRB.Leaf arr) = length arr
Loading

0 comments on commit e2f238d

Please sign in to comment.