-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
913 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
lib | ||
build | ||
subprojects/* | ||
!subprojects/*.wrap |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#ifndef _PAREAS_PROFILER_PROFILER_HPP | ||
#define _PAREAS_PROFILER_PROFILER_HPP | ||
|
||
#include <iosfwd> | ||
#include <chrono> | ||
#include <vector> | ||
#include <functional> | ||
|
||
namespace pareas { | ||
struct Profiler { | ||
using SyncCallback = std::function<void()>; | ||
|
||
using Clock = std::chrono::high_resolution_clock; | ||
|
||
struct HistoryEntry { | ||
unsigned level; | ||
const char* name; | ||
Clock::duration elapsed; | ||
}; | ||
|
||
unsigned max_level; | ||
unsigned level; | ||
|
||
SyncCallback sync_callback; | ||
std::vector<Clock::time_point> starts; | ||
std::vector<HistoryEntry> history; | ||
|
||
Profiler(unsigned max_level); | ||
|
||
void set_sync_callback(SyncCallback sync_callback = null_callback); | ||
|
||
void begin(); | ||
void end(const char* name); | ||
|
||
void dump(std::ostream& os); | ||
|
||
template <typename F> | ||
void measure(const char* name, F f) { | ||
this->begin(); | ||
f(); | ||
this->end(name); | ||
} | ||
|
||
static void null_callback() {} | ||
}; | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
-- | Irregular segmented operations, like scans and reductions. | ||
|
||
-- | Segmented scan. Given a binary associative operator ``op`` with | ||
-- neutral element ``ne``, computes the inclusive prefix scan of the | ||
-- segments of ``as`` specified by the ``flags`` array, where `true` | ||
-- starts a segment and `false` continues a segment. | ||
let segmented_scan [n] 't (op: t -> t -> t) (ne: t) | ||
(flags: [n]bool) (as: [n]t): [n]t = | ||
(unzip (scan (\(x_flag,x) (y_flag,y) -> | ||
(x_flag || y_flag, | ||
if y_flag then y else x `op` y)) | ||
(false, ne) | ||
(zip flags as))).1 | ||
|
||
-- | Segmented reduction. Given a binary associative operator ``op`` | ||
-- with neutral element ``ne``, computes the reduction of the segments | ||
-- of ``as`` specified by the ``flags`` array, where `true` starts a | ||
-- segment and `false` continues a segment. One value is returned per | ||
-- segment. | ||
let segmented_reduce [n] 't (op: t -> t -> t) (ne: t) | ||
(flags: [n]bool) (as: [n]t) = | ||
-- Compute segmented scan. Then we just have to fish out the end of | ||
-- each segment. | ||
let as' = segmented_scan op ne flags as | ||
-- Find the segment ends. | ||
let segment_ends = rotate 1 flags | ||
-- Find the offset for each segment end. | ||
let segment_end_offsets = segment_ends |> map i64.bool |> scan (+) 0 | ||
let num_segments = if n > 0 then last segment_end_offsets else 0 | ||
-- Make room for the final result. The specific value we write here | ||
-- does not matter; they will all be overwritten by the segment | ||
-- ends. | ||
let scratch = replicate num_segments ne | ||
-- Compute where to write each element of as'. Only segment ends | ||
-- are written. | ||
let index i f = if f then i-1 else -1 | ||
in scatter scratch (map2 index segment_end_offsets segment_ends) as' | ||
|
||
-- | Replicated iota. Given a repetition array, the function returns | ||
-- an array with each index (starting from 0) repeated according to | ||
-- the repetition array. As an example, replicated_iota [2,3,1] | ||
-- returns the array [0,0,1,1,1,2]. | ||
|
||
let replicated_iota [n] (reps:[n]i64) : []i64 = | ||
let s1 = scan (+) 0 reps | ||
let s2 = map2 (\i x -> if i==0 then 0 else x) | ||
(iota n) (rotate (-1) s1) | ||
let tmp = reduce_by_index (replicate (reduce (+) 0 reps) 0) i64.max 0 s2 (iota n) | ||
let flags = map (>0) tmp | ||
in segmented_scan (+) 0 flags tmp | ||
|
||
-- | Segmented iota. Given a flags array, the function returns an | ||
-- array of index sequences, each of which is reset according to the | ||
-- flags array. As an examples, segmented_iota | ||
-- [false,false,false,true,false,false,false] returns the array | ||
-- [0,1,2,0,1,2,3]. | ||
|
||
let segmented_iota [n] (flags:[n]bool) : [n]i64 = | ||
let iotas = segmented_scan (+) 0 flags (replicate n 1) | ||
in map (\x -> x-1) iotas | ||
|
||
-- | Generic expansion function. The function expands a source array | ||
-- into a target array given (1) a function that determines, for each | ||
-- source element, how many target elements it expands to and (2) a | ||
-- function that computes a particular target element based on a | ||
-- source element and the target element number associated with the | ||
-- source. As an example, the expression expand (\x->x) (*) [2,3,1] | ||
-- returns the array [0,2,0,3,6,0]. | ||
|
||
let expand 'a 'b (sz: a -> i64) (get: a -> i64 -> b) (arr:[]a) : []b = | ||
let szs = map sz arr | ||
let idxs = replicated_iota szs | ||
let iotas = segmented_iota (map2 (!=) idxs (rotate (-1) idxs)) | ||
in map2 (\i j -> get arr[i] j) idxs iotas | ||
|
||
-- | Expansion function equivalent to performing a segmented reduction | ||
-- to the result of a general expansion with a flags vector expressing | ||
-- the beginning of the expanded segments. The function makes use of | ||
-- the intermediate flags vector generated as part of the expansion | ||
-- and the `expand_reduce` function is therefore more efficient than | ||
-- if a segmented reduction (with an appropriate flags vector) is | ||
-- explicitly followed by a call to expand. | ||
|
||
let expand_reduce 'a 'b (sz: a -> i64) (get: a -> i64 -> b) | ||
(op: b -> b -> b) (ne:b) (arr:[]a) : []b = | ||
let szs = map sz arr | ||
let idxs = replicated_iota szs | ||
let flags = map2 (!=) idxs (rotate (-1) idxs) | ||
let iotas = segmented_iota flags | ||
let vs = map2 (\i j -> get arr[i] j) idxs iotas | ||
in segmented_reduce op ne flags vs | ||
|
||
-- | Expansion followed by an ''outer segmented reduce'' that ensures | ||
-- that each element in the result array corresponds to expanding and | ||
-- reducing the corresponding element in the source array. | ||
|
||
let expand_outer_reduce 'a 'b [n] (sz: a -> i64) (get: a -> i64 -> b) | ||
(op: b -> b -> b) (ne: b) | ||
(arr: [n]a) : [n]b = | ||
let sz' x = let s = sz x | ||
in if s == 0 then 1 else s | ||
let get' x i = if sz x == 0 then ne else get x i | ||
in expand_reduce sz' get' op ne arr :> [n]b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
-- | ignore | ||
|
||
import "segmented" | ||
|
||
-- == | ||
-- entry: test_segmented_scan | ||
-- input { [true,false,false,true,false,false,true,false,false,false] | ||
-- [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64,10i64] } | ||
-- output { [1i64,3i64,6i64,4i64,9i64,15i64,7i64,15i64,24i64,34i64] } | ||
-- input { [true] [1i64] } | ||
-- output { [1i64] } | ||
-- input { empty([0]bool) empty([0]i64) } | ||
-- output { empty([0]i64) } | ||
|
||
entry test_segmented_scan (flags: []bool) (as: []i64) = | ||
segmented_scan (+) 0 flags as | ||
|
||
-- == | ||
-- entry: test_segmented_reduce | ||
-- input { [true,false,false,true,false,false,true,false,false,false] | ||
-- [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64,10i64] } | ||
-- output { [6i64,15i64,34i64] } | ||
-- input { [true] [1i64] } | ||
-- output { [1i64] } | ||
|
||
entry test_segmented_reduce (flags: []bool) (as: []i64) = | ||
segmented_reduce (+) 0 flags as | ||
|
||
-- == | ||
-- entry: test_replicated_iota | ||
-- input { [2i64,3i64,1i64] } output { [0i64,0i64,1i64,1i64,1i64,2i64] } | ||
-- input { [3i64] } output { [0i64,0i64,0i64] } | ||
-- input { [2i64,0i64,1i64] } output { [0i64,0i64,2i64] } | ||
-- input { empty([0]i64) } output { empty([0]i64) } | ||
-- input { [0i64] } output { empty([0]i64) } | ||
-- input { [0i64,0i64] } output { empty([0]i64) } | ||
|
||
entry test_replicated_iota (repl:[]i64) : []i64 = | ||
replicated_iota repl | ||
|
||
-- == | ||
-- entry: test_segmented_iota | ||
-- input { [false,false,false,true,false,false,false] } | ||
-- output { [0i64,1i64,2i64,0i64,1i64,2i64,3i64] } | ||
-- input { [false] } output { [0i64] } | ||
-- input { [true] } output { [0i64] } | ||
-- input { empty([0]bool) } output { empty([0]i64) } | ||
|
||
entry test_segmented_iota (flags:[]bool) : []i64 = | ||
segmented_iota flags | ||
|
||
-- == | ||
-- entry: test_expand | ||
-- input { [2i64,3i64,1i64] } | ||
-- output { [0i64,2i64,0i64,3i64,6i64,0i64] } | ||
|
||
entry test_expand (arr:[]i64) : []i64 = | ||
expand (\ x -> x) (\x i -> x*i) arr | ||
|
||
-- == | ||
-- entry: test_expand_reduce | ||
-- input { [2i64,0i64,3i64,1i64] } | ||
-- output { [2i64,9i64,0i64] } | ||
|
||
entry test_expand_reduce (arr:[]i64) : []i64 = | ||
expand_reduce (\ x -> x) (\x i -> x*i) (+) 0 arr | ||
|
||
-- == | ||
-- entry: test_expand_outer_reduce | ||
-- input { [2i64,0i64,3i64,1i64] } | ||
-- output { [2i64,0i64,9i64,0i64] } | ||
|
||
entry test_expand_outer_reduce (arr:[]i64) : []i64 = | ||
expand_outer_reduce (\ x -> x) (\x i -> x*i) (+) 0 arr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
* | ||
!*.fut | ||
!.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
-- | Parallel bubble sort. | ||
-- | ||
-- This may be useful if you have almost-sorted data that you want to | ||
-- make fully-sorted in parallel. Obviously *very* slow for | ||
-- non-sorted data. | ||
|
||
-- | Parallel bubble sort. Runs with *O(n^2)* work and *O(n^2)* depth. | ||
let bubble_sort [n] 't ((<=): t -> t -> bool) (xs: [n]t): [n]t = | ||
let f b xs i = | ||
let dir = if i%2 == 0 then b else -b | ||
let j = i + dir | ||
let cmp x y = if dir == 1 then x <= y | ||
else ! (x <= y) | ||
in if j >= 0 && j < n && (xs[j] `cmp` xs[i]) | ||
then (true, xs[j]) else (false, xs[i]) | ||
let iter xs b = | ||
let (changed, xs) = tabulate n (f b xs) |> unzip | ||
in (xs, -b, or changed) | ||
in (loop (xs, b, continue) = (xs, 1, true) while continue do iter xs b).0 | ||
|
||
-- | Like `bubble_sort`@term, but sort based on key function. | ||
let bubble_sort_by_key [n] 't 'k (key: t -> k) ((<=): k -> k -> bool) (xs: [n]t): [n]t = | ||
zip (map key xs) (iota n) | ||
|> bubble_sort (\(x, _) (y, _) -> x <= y) | ||
|> map (\(_, i) -> xs[i]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
-- | ignore | ||
|
||
import "bubble_sort" | ||
|
||
-- == | ||
-- entry: sort_i32 | ||
-- input { empty([0]i32) } | ||
-- output { empty([0]i32) } | ||
-- input { [5,4,3,2,1] } | ||
-- output { [1,2,3,4,5] } | ||
-- input { [5,4,3,3,2,1] } | ||
-- output { [1,2,3,3,4,5] } | ||
|
||
entry sort_i32 (xs: []i32) = bubble_sort (i32.<=) xs | ||
|
||
-- == | ||
-- entry: sort_u16 | ||
-- input { [5u16,4u16,3u16,2u16,1u16] } | ||
-- output { [1u16,2u16,3u16,4u16,5u16] } | ||
|
||
entry sort_u16 (xs: []u16) = bubble_sort (u16.<=) xs | ||
|
||
-- == | ||
-- entry: sort_f32 | ||
-- input { [5f32,4f32,3f32,2f32,1f32] } | ||
-- output { [1f32,2f32,3f32,4f32,5f32] } | ||
|
||
entry sort_f32 (xs: []f32) = bubble_sort (f32.<=) xs | ||
|
||
-- == | ||
-- entry: sort_perm_i32 | ||
-- input { [5,4,3,2,1,0,-1,-2] } | ||
-- output { [7, 6, 5, 4, 3, 2, 1, 0] } | ||
|
||
entry sort_perm_i32 [n] (xs: [n]i32) = | ||
zip xs (iota n) | ||
|> bubble_sort_by_key (.0) (<=) | ||
|> map ((.1) >-> i32.i64) | ||
|
||
-- == | ||
-- entry: sort_perm_f32 | ||
-- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] } | ||
-- output { [7, 6, 5, 4, 3, 2, 1, 0] } | ||
|
||
entry sort_perm_f32 [n] (xs: [n]f32) = | ||
zip xs (iota n) | ||
|> bubble_sort_by_key (.0) (<=) | ||
|> map ((.1) >-> i32.i64) |
Oops, something went wrong.