Skip to content

Commit

Permalink
Merge branch 'frontend'
Browse files Browse the repository at this point in the history
  • Loading branch information
Snektron committed Jul 10, 2021
2 parents bf0ed45 + a319492 commit 365e0ae
Show file tree
Hide file tree
Showing 23 changed files with 913 additions and 169 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
lib
build
subprojects/*
!subprojects/*.wrap
4 changes: 2 additions & 2 deletions include/pareas/compiler/frontend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "futhark_generated.h"

#include "pareas/compiler/ast.hpp"
#include "pareas/compiler/profiler.hpp"
#include "pareas/profiler/profiler.hpp"

#include <chrono>
#include <stdexcept>
Expand Down Expand Up @@ -33,7 +33,7 @@ namespace frontend {
std::runtime_error(error_name(e)) {}
};

DeviceAst compile(futhark_context* ctx, const std::string& input, Profiler& p);
DeviceAst compile(futhark_context* ctx, const std::string& input, pareas::Profiler& p);
}

#endif
47 changes: 0 additions & 47 deletions include/pareas/compiler/profiler.hpp

This file was deleted.

48 changes: 48 additions & 0 deletions include/pareas/profiler/profiler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef _PAREAS_PROFILER_PROFILER_HPP
#define _PAREAS_PROFILER_PROFILER_HPP

#include <iosfwd>
#include <chrono>
#include <vector>
#include <functional>

namespace pareas {
struct Profiler {
using SyncCallback = std::function<void()>;

using Clock = std::chrono::high_resolution_clock;

struct HistoryEntry {
unsigned level;
const char* name;
Clock::duration elapsed;
};

unsigned max_level;
unsigned level;

SyncCallback sync_callback;
std::vector<Clock::time_point> starts;
std::vector<HistoryEntry> history;

Profiler(unsigned max_level);

void set_sync_callback(SyncCallback sync_callback = null_callback);

void begin();
void end(const char* name);

void dump(std::ostream& os);

template <typename F>
void measure(const char* name, F f) {
this->begin();
f();
this->end(name);
}

static void null_callback() {}
};
}

#endif
103 changes: 103 additions & 0 deletions lib/github.com/diku-dk/segmented/segmented.fut
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
-- | Irregular segmented operations, like scans and reductions.

-- | Segmented scan. Given a binary associative operator ``op`` with
-- neutral element ``ne``, computes the inclusive prefix scan of the
-- segments of ``as`` specified by the ``flags`` array, where `true`
-- starts a segment and `false` continues a segment.
let segmented_scan [n] 't (op: t -> t -> t) (ne: t)
(flags: [n]bool) (as: [n]t): [n]t =
(unzip (scan (\(x_flag,x) (y_flag,y) ->
(x_flag || y_flag,
if y_flag then y else x `op` y))
(false, ne)
(zip flags as))).1

-- | Segmented reduction. Given a binary associative operator ``op``
-- with neutral element ``ne``, computes the reduction of the segments
-- of ``as`` specified by the ``flags`` array, where `true` starts a
-- segment and `false` continues a segment. One value is returned per
-- segment.
let segmented_reduce [n] 't (op: t -> t -> t) (ne: t)
(flags: [n]bool) (as: [n]t) =
-- Compute segmented scan. Then we just have to fish out the end of
-- each segment.
let as' = segmented_scan op ne flags as
-- Find the segment ends.
let segment_ends = rotate 1 flags
-- Find the offset for each segment end.
let segment_end_offsets = segment_ends |> map i64.bool |> scan (+) 0
let num_segments = if n > 0 then last segment_end_offsets else 0
-- Make room for the final result. The specific value we write here
-- does not matter; they will all be overwritten by the segment
-- ends.
let scratch = replicate num_segments ne
-- Compute where to write each element of as'. Only segment ends
-- are written.
let index i f = if f then i-1 else -1
in scatter scratch (map2 index segment_end_offsets segment_ends) as'

-- | Replicated iota. Given a repetition array, the function returns
-- an array with each index (starting from 0) repeated according to
-- the repetition array. As an example, replicated_iota [2,3,1]
-- returns the array [0,0,1,1,1,2].

let replicated_iota [n] (reps:[n]i64) : []i64 =
let s1 = scan (+) 0 reps
let s2 = map2 (\i x -> if i==0 then 0 else x)
(iota n) (rotate (-1) s1)
let tmp = reduce_by_index (replicate (reduce (+) 0 reps) 0) i64.max 0 s2 (iota n)
let flags = map (>0) tmp
in segmented_scan (+) 0 flags tmp

-- | Segmented iota. Given a flags array, the function returns an
-- array of index sequences, each of which is reset according to the
-- flags array. As an examples, segmented_iota
-- [false,false,false,true,false,false,false] returns the array
-- [0,1,2,0,1,2,3].

let segmented_iota [n] (flags:[n]bool) : [n]i64 =
let iotas = segmented_scan (+) 0 flags (replicate n 1)
in map (\x -> x-1) iotas

-- | Generic expansion function. The function expands a source array
-- into a target array given (1) a function that determines, for each
-- source element, how many target elements it expands to and (2) a
-- function that computes a particular target element based on a
-- source element and the target element number associated with the
-- source. As an example, the expression expand (\x->x) (*) [2,3,1]
-- returns the array [0,2,0,3,6,0].

let expand 'a 'b (sz: a -> i64) (get: a -> i64 -> b) (arr:[]a) : []b =
let szs = map sz arr
let idxs = replicated_iota szs
let iotas = segmented_iota (map2 (!=) idxs (rotate (-1) idxs))
in map2 (\i j -> get arr[i] j) idxs iotas

-- | Expansion function equivalent to performing a segmented reduction
-- to the result of a general expansion with a flags vector expressing
-- the beginning of the expanded segments. The function makes use of
-- the intermediate flags vector generated as part of the expansion
-- and the `expand_reduce` function is therefore more efficient than
-- if a segmented reduction (with an appropriate flags vector) is
-- explicitly followed by a call to expand.

let expand_reduce 'a 'b (sz: a -> i64) (get: a -> i64 -> b)
(op: b -> b -> b) (ne:b) (arr:[]a) : []b =
let szs = map sz arr
let idxs = replicated_iota szs
let flags = map2 (!=) idxs (rotate (-1) idxs)
let iotas = segmented_iota flags
let vs = map2 (\i j -> get arr[i] j) idxs iotas
in segmented_reduce op ne flags vs

-- | Expansion followed by an ''outer segmented reduce'' that ensures
-- that each element in the result array corresponds to expanding and
-- reducing the corresponding element in the source array.

let expand_outer_reduce 'a 'b [n] (sz: a -> i64) (get: a -> i64 -> b)
(op: b -> b -> b) (ne: b)
(arr: [n]a) : [n]b =
let sz' x = let s = sz x
in if s == 0 then 1 else s
let get' x i = if sz x == 0 then ne else get x i
in expand_reduce sz' get' op ne arr :> [n]b
74 changes: 74 additions & 0 deletions lib/github.com/diku-dk/segmented/segmented_tests.fut
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
-- | ignore

import "segmented"

-- ==
-- entry: test_segmented_scan
-- input { [true,false,false,true,false,false,true,false,false,false]
-- [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64,10i64] }
-- output { [1i64,3i64,6i64,4i64,9i64,15i64,7i64,15i64,24i64,34i64] }
-- input { [true] [1i64] }
-- output { [1i64] }
-- input { empty([0]bool) empty([0]i64) }
-- output { empty([0]i64) }

entry test_segmented_scan (flags: []bool) (as: []i64) =
segmented_scan (+) 0 flags as

-- ==
-- entry: test_segmented_reduce
-- input { [true,false,false,true,false,false,true,false,false,false]
-- [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64,10i64] }
-- output { [6i64,15i64,34i64] }
-- input { [true] [1i64] }
-- output { [1i64] }

entry test_segmented_reduce (flags: []bool) (as: []i64) =
segmented_reduce (+) 0 flags as

-- ==
-- entry: test_replicated_iota
-- input { [2i64,3i64,1i64] } output { [0i64,0i64,1i64,1i64,1i64,2i64] }
-- input { [3i64] } output { [0i64,0i64,0i64] }
-- input { [2i64,0i64,1i64] } output { [0i64,0i64,2i64] }
-- input { empty([0]i64) } output { empty([0]i64) }
-- input { [0i64] } output { empty([0]i64) }
-- input { [0i64,0i64] } output { empty([0]i64) }

entry test_replicated_iota (repl:[]i64) : []i64 =
replicated_iota repl

-- ==
-- entry: test_segmented_iota
-- input { [false,false,false,true,false,false,false] }
-- output { [0i64,1i64,2i64,0i64,1i64,2i64,3i64] }
-- input { [false] } output { [0i64] }
-- input { [true] } output { [0i64] }
-- input { empty([0]bool) } output { empty([0]i64) }

entry test_segmented_iota (flags:[]bool) : []i64 =
segmented_iota flags

-- ==
-- entry: test_expand
-- input { [2i64,3i64,1i64] }
-- output { [0i64,2i64,0i64,3i64,6i64,0i64] }

entry test_expand (arr:[]i64) : []i64 =
expand (\ x -> x) (\x i -> x*i) arr

-- ==
-- entry: test_expand_reduce
-- input { [2i64,0i64,3i64,1i64] }
-- output { [2i64,9i64,0i64] }

entry test_expand_reduce (arr:[]i64) : []i64 =
expand_reduce (\ x -> x) (\x i -> x*i) (+) 0 arr

-- ==
-- entry: test_expand_outer_reduce
-- input { [2i64,0i64,3i64,1i64] }
-- output { [2i64,0i64,9i64,0i64] }

entry test_expand_outer_reduce (arr:[]i64) : []i64 =
expand_outer_reduce (\ x -> x) (\x i -> x*i) (+) 0 arr
3 changes: 3 additions & 0 deletions lib/github.com/diku-dk/sorts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!*.fut
!.gitignore
25 changes: 25 additions & 0 deletions lib/github.com/diku-dk/sorts/bubble_sort.fut
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- | Parallel bubble sort.
--
-- This may be useful if you have almost-sorted data that you want to
-- make fully-sorted in parallel. Obviously *very* slow for
-- non-sorted data.

-- | Parallel bubble sort. Runs with *O(n^2)* work and *O(n^2)* depth.
let bubble_sort [n] 't ((<=): t -> t -> bool) (xs: [n]t): [n]t =
let f b xs i =
let dir = if i%2 == 0 then b else -b
let j = i + dir
let cmp x y = if dir == 1 then x <= y
else ! (x <= y)
in if j >= 0 && j < n && (xs[j] `cmp` xs[i])
then (true, xs[j]) else (false, xs[i])
let iter xs b =
let (changed, xs) = tabulate n (f b xs) |> unzip
in (xs, -b, or changed)
in (loop (xs, b, continue) = (xs, 1, true) while continue do iter xs b).0

-- | Like `bubble_sort`@term, but sort based on key function.
let bubble_sort_by_key [n] 't 'k (key: t -> k) ((<=): k -> k -> bool) (xs: [n]t): [n]t =
zip (map key xs) (iota n)
|> bubble_sort (\(x, _) (y, _) -> x <= y)
|> map (\(_, i) -> xs[i])
48 changes: 48 additions & 0 deletions lib/github.com/diku-dk/sorts/bubble_sort_tests.fut
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- | ignore

import "bubble_sort"

-- ==
-- entry: sort_i32
-- input { empty([0]i32) }
-- output { empty([0]i32) }
-- input { [5,4,3,2,1] }
-- output { [1,2,3,4,5] }
-- input { [5,4,3,3,2,1] }
-- output { [1,2,3,3,4,5] }

entry sort_i32 (xs: []i32) = bubble_sort (i32.<=) xs

-- ==
-- entry: sort_u16
-- input { [5u16,4u16,3u16,2u16,1u16] }
-- output { [1u16,2u16,3u16,4u16,5u16] }

entry sort_u16 (xs: []u16) = bubble_sort (u16.<=) xs

-- ==
-- entry: sort_f32
-- input { [5f32,4f32,3f32,2f32,1f32] }
-- output { [1f32,2f32,3f32,4f32,5f32] }

entry sort_f32 (xs: []f32) = bubble_sort (f32.<=) xs

-- ==
-- entry: sort_perm_i32
-- input { [5,4,3,2,1,0,-1,-2] }
-- output { [7, 6, 5, 4, 3, 2, 1, 0] }

entry sort_perm_i32 [n] (xs: [n]i32) =
zip xs (iota n)
|> bubble_sort_by_key (.0) (<=)
|> map ((.1) >-> i32.i64)

-- ==
-- entry: sort_perm_f32
-- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] }
-- output { [7, 6, 5, 4, 3, 2, 1, 0] }

entry sort_perm_f32 [n] (xs: [n]f32) =
zip xs (iota n)
|> bubble_sort_by_key (.0) (<=)
|> map ((.1) >-> i32.i64)
Loading

0 comments on commit 365e0ae

Please sign in to comment.