diff --git a/include/pareas/compiler/ast.hpp b/include/pareas/compiler/ast.hpp index bfd9cbb..c0be825 100644 --- a/include/pareas/compiler/ast.hpp +++ b/include/pareas/compiler/ast.hpp @@ -32,7 +32,7 @@ struct HostAst { std::unique_ptr node_depths; std::unique_ptr child_indexes; - std::unique_ptr fn_tab; + std::unique_ptr fn_tab; void dump_dot(std::ostream& os) const; }; @@ -47,7 +47,7 @@ struct DeviceAst { futhark_i32_1d* node_depths; futhark_i32_1d* child_indexes; - futhark_i32_1d* fn_tab; + futhark_u32_1d* fn_tab; explicit DeviceAst(futhark_context* ctx); diff --git a/include/pareas/compiler/backend.hpp b/include/pareas/compiler/backend.hpp new file mode 100644 index 0000000..4ea3f9a --- /dev/null +++ b/include/pareas/compiler/backend.hpp @@ -0,0 +1,14 @@ +#ifndef _PAREAS_COMPILER_BACKEND_HPP +#define _PAREAS_COMPILER_BACKEND_HPP + +#include "futhark_generated.h" + +#include "pareas/compiler/ast.hpp" +#include "pareas/compiler/module.hpp" +#include "pareas/profiler/profiler.hpp" + +namespace backend { + DeviceModule compile(futhark_context* ctx, DeviceAst& ast, pareas::Profiler& p); +} + +#endif diff --git a/include/pareas/compiler/futhark_interop.hpp b/include/pareas/compiler/futhark_interop.hpp index e371d94..aa33854 100644 --- a/include/pareas/compiler/futhark_interop.hpp +++ b/include/pareas/compiler/futhark_interop.hpp @@ -105,6 +105,9 @@ namespace futhark { using UniqueParseTable = UniqueOpaqueArray; using UniqueStackChangeTable = UniqueOpaqueArray; using UniqueTokenArray = UniqueOpaqueArray; + using UniqueTree = UniqueOpaqueArray; + using UniqueFuncInfoArray = UniqueOpaqueArray; + using UniqueInstrArray = UniqueOpaqueArray; template struct ArrayTraits; @@ -165,6 +168,15 @@ namespace futhark { } }; + template <> + struct ArrayTraits { + using Array = futhark_bool_1d; + constexpr static const auto new_fn = futhark_new_bool_1d; + constexpr static const auto free_fn = futhark_free_bool_1d; + constexpr static const auto shape_fn = futhark_shape_bool_1d; + constexpr static const auto values_fn = futhark_values_bool_1d; + }; + template <> struct ArrayTraits { using Array = futhark_u8_1d; diff --git a/include/pareas/compiler/module.hpp b/include/pareas/compiler/module.hpp new file mode 100644 index 0000000..80371b8 --- /dev/null +++ b/include/pareas/compiler/module.hpp @@ -0,0 +1,27 @@ +#ifndef _PAREAS_COMPILER_MODULE_HPP +#define _PAREAS_COMPILER_MODULE_HPP + +#include "futhark_generated.h" + +#include + +struct DeviceModule { + futhark_context* ctx; + + futhark_u32_1d* func_id; + futhark_u32_1d* func_start; + futhark_u32_1d* func_size; + futhark_u32_1d* instructions; + + explicit DeviceModule(futhark_context* ctx); + + DeviceModule(const DeviceModule&) = delete; + DeviceModule& operator=(const DeviceModule&) = delete; + + DeviceModule(DeviceModule&& other); + DeviceModule& operator=(DeviceModule&& other); + + ~DeviceModule(); +}; + +#endif diff --git a/meson.build b/meson.build index 15b43b0..de62556 100644 --- a/meson.build +++ b/meson.build @@ -102,10 +102,13 @@ grammar_asm = grammar[3] futhark_sources = [ 'lib/github.com/diku-dk/sorts/radix_sort.fut', 'lib/github.com/diku-dk/segmented/segmented.fut', + 'src/compiler/main.fut', 'src/compiler/string.fut', 'src/compiler/util.fut', 'src/compiler/datatypes.fut', 'src/compiler/frontend.fut', + 'src/compiler/backend.fut', + 'src/compiler/bridge.fut', 'src/compiler/lexer/lexer.fut', 'src/compiler/parser/binary_tree.fut', 'src/compiler/parser/bracket_matching.fut', @@ -123,6 +126,14 @@ futhark_sources = [ 'src/compiler/passes/type_resolution.fut', 'src/compiler/passes/check_return_paths.fut', 'src/compiler/passes/ids.fut', + 'src/compiler/codegen/datatypes.fut', + 'src/compiler/codegen/instr.fut', + 'src/compiler/codegen/instr_count.fut', + 'src/compiler/codegen/optimizer.fut', + 'src/compiler/codegen/postprocess.fut', + 'src/compiler/codegen/preprocess.fut', + 'src/compiler/codegen/register.fut', + 'src/compiler/codegen/tree.fut', ] futhark_compile_command = [ @@ -131,7 +142,7 @@ futhark_compile_command = [ '--futhark-backend', futhark_backend, '--output', '@OUTDIR@/futhark_generated', '--dir', '@PRIVATE_DIR@', - '--main', 'src/compiler/frontend.fut', + '--main', 'src/compiler/main.fut', ] inputs = [] @@ -157,6 +168,8 @@ sources = files( 'src/compiler/main.cpp', 'src/compiler/frontend.cpp', 'src/compiler/ast.cpp', + 'src/compiler/module.cpp', + 'src/compiler/backend.cpp', ) pareas_exe = executable( diff --git a/src/compiler/ast.cpp b/src/compiler/ast.cpp index 1829d74..cbcd335 100644 --- a/src/compiler/ast.cpp +++ b/src/compiler/ast.cpp @@ -135,7 +135,7 @@ DeviceAst::~DeviceAst() { futhark_free_i32_1d(this->ctx, this->child_indexes); if (this->fn_tab) - futhark_free_i32_1d(this->ctx, this->fn_tab); + futhark_free_u32_1d(this->ctx, this->fn_tab); } size_t DeviceAst::num_nodes() const { @@ -147,7 +147,7 @@ size_t DeviceAst::num_nodes() const { size_t DeviceAst::num_functions() const { if (!this->fn_tab) return 0; - return futhark_shape_i32_1d(this->ctx, this->fn_tab)[0]; + return futhark_shape_u32_1d(this->ctx, this->fn_tab)[0]; } HostAst DeviceAst::download() const { @@ -163,7 +163,7 @@ HostAst DeviceAst::download() const { .data_types = std::make_unique(num_nodes), .node_depths = std::make_unique(num_nodes), .child_indexes = std::make_unique(num_nodes), - .fn_tab = std::make_unique(num_functions) + .fn_tab = std::make_unique(num_functions) }; int err = futhark_values_u8_1d( @@ -184,7 +184,7 @@ HostAst DeviceAst::download() const { err |= futhark_values_i32_1d(this->ctx, this->node_depths, ast.node_depths.get()); err |= futhark_values_i32_1d(this->ctx, this->child_indexes, ast.child_indexes.get()); - err |= futhark_values_i32_1d(this->ctx, this->fn_tab, ast.fn_tab.get()); + err |= futhark_values_u32_1d(this->ctx, this->fn_tab, ast.fn_tab.get()); if (err) throw futhark::Error(this->ctx); diff --git a/src/compiler/backend.cpp b/src/compiler/backend.cpp index b4d64b1..67a1314 100644 --- a/src/compiler/backend.cpp +++ b/src/compiler/backend.cpp @@ -1,493 +1,151 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "codegen/lexer.hpp" -#include "codegen/parser.hpp" -#include "codegen/astnode.hpp" -#include "codegen/exception.hpp" -#include "codegen/depthtree.hpp" -#include "codegen/symtab.hpp" -#include "codegen/treeproperties.hpp" - -#include "profiler/profiler.hpp" - -//const size_t MAX_NODES = 32; -//const size_t MAX_VARS = 32; - -struct Options { - const char* input_path; - const char* output_path; - bool help; - bool verbose; - bool debug; - - // Options available for the multicore backend - int threads; - - // Options abailable for the OpenCL and CUDA backends - const char* device_name; - unsigned profile; -}; - -inline std::string get_error_str(futhark_context* ctx) { - auto err = futhark_context_get_error(ctx); - if (err) { - auto err_str = std::string(err); - free(err); // leak if the string constructor throws, but whatever. - return err_str; - } - - return "(no diagnostic)"; -} - -class FutharkError : public std::runtime_error { - public: - FutharkError(futhark_context* ctx) : std::runtime_error(get_error_str(ctx)) {} - virtual ~FutharkError() = default; -}; - -void print_usage(const char* progname) { - std::cout << - "Usage: " << progname << " [options...] \n" - "Available options:\n" - "-o --output Write the output to . (default: b.out)\n" - "-h --help Show this message and exit.\n" - "-v --verbose Enable Futhark logging.\n" - "-d --debug Enable Futhark debug logging.\n" - #if defined(FUTHARK_BACKEND_multicore) - "Available backend options:\n" - "-t --threads Set the maximum number of threads that may be used.\n" - #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) - "Available backend options:\n" - "--device Select the device that kernels are executed on. Any\n" - " device which name contains may be used. The\n" - " special value #k may be used to select the k-th\n" - " device reported by the platform.\n" - "-p --profile Enable Futhark profiling and print report at exit.\n" - #endif - "\n" - "When and/or are '-', standard input and standard\n" - "output are used respectively.\n"; -} - -bool parse_options(Options* opts, int argc, const char* argv[]) { - *opts = { - .input_path = nullptr, - .output_path = "b.out", - .help = false, - .verbose = false, - .debug = false, - .threads = 0, - .device_name = nullptr, - .profile = 0, - }; - - const char* threads_arg = nullptr; - const char* profile_arg = nullptr; - - for (int i = 1; i < argc; ++i) { - auto arg = std::string_view(argv[i]); - - #if defined(FUTHARK_BACKEND_multicore) - if (arg == "-t" || arg == "--threads") { - if (++i >= argc) { - std::cerr << "Error: Expected argument to option " << arg << std::endl; - return false; - } - - threads_arg = argv[i]; - continue; - } - #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) - if (arg == "--device") { - if (++i >= argc) { - std::cerr << "Error: Expected argument to option " << arg << std::endl; - return false; - } - - opts->device_name = argv[i]; - continue; - } - #endif - - if (arg == "-o" || arg == "--output") { - if (++i >= argc) { - std::cerr << "Error: Expected argument to option " << arg << std::endl; - return false; - } - opts->output_path = argv[i]; - } else if (arg == "-h" || arg == "--help") { - opts->help = true; - } else if (arg == "-v" || arg == "--verbose") { - opts->verbose = true; - } else if (arg == "-d" || arg == "--debug") { - opts->debug = true; - } else if (arg == "-p" || arg == "--profile") { - if (++i >= argc) { - std::cerr << "Error: Expected argument to option " << arg << std::endl;; - return false; - } - - profile_arg = argv[i]; - } else if (!opts->input_path) { - opts->input_path = argv[i]; - } else { - std::cerr << "Error: Unknown option '" << arg << "'" << std::endl; - return false; - } - } - - if (opts->help) - return true; - - if (!opts->input_path) { - std::cerr << "Error: Missing required argument " << std::endl; - return false; - } else if (!opts->input_path[0]) { - std::cerr << "Error: may not be empty" << std::endl; - return false; - } - - if (!opts->output_path[0]) { - std::cerr << "Error: may not be empty" << std::endl; - return false; - } - - if (threads_arg) { - const auto* end = threads_arg + std::strlen(threads_arg); - auto [p, ec] = std::from_chars(threads_arg, end, opts->threads); - if (ec != std::errc() || p != end || opts->threads < 1) { - std::cerr << "Error: Invalid value '" << threads_arg << "' for option --threads" << std::endl; - return false; - } - } - - if (profile_arg) { - const auto* end = profile_arg + std::strlen(profile_arg); - auto [p, ec] = std::from_chars(profile_arg, end, opts->profile); - if (ec != std::errc() || p != end) { - std::cerr << "Error: Invalid value " << profile_arg << " for option --profile" << std::endl; - return false; - } - } - - return true; -} - -template -struct Deleter { - void operator()(T* t) const { - deleter(t); - } -}; - -template -using UniqueCPtr = std::unique_ptr>; - -template -struct Free { - void operator()(T* ptr) const { - free(static_cast(ptr)); - } -}; - -template -using MallocPtr = std::unique_ptr>; - -template -class UniqueFPtr { - private: - futhark_context* ctx; - T* data; - public: - UniqueFPtr(futhark_context* ctx) : ctx(ctx), data(nullptr) {} - UniqueFPtr(futhark_context* ctx, T* data) : ctx(ctx), data(data) {} - UniqueFPtr(const UniqueFPtr&) = delete; - UniqueFPtr(UniqueFPtr&& o) { - std::swap(this->ctx, o.ctx); - std::swap(this->data, o.data); - } - - UniqueFPtr& operator=(const UniqueFPtr&) = delete; - UniqueFPtr& operator=(UniqueFPtr&& o) { - std::swap(this->ctx, o.ctx); - std::swap(this->data, o.data); - } - - ~UniqueFPtr() { - if(this->data != nullptr) - deleter(this->ctx, this->data); - } - - T* get() { - return this->data; - } - - T** operator&() { - return &this->data; - } - - void release() { - deleter(this->ctx, this->data); - this->data = nullptr; - } -}; - -int main(int argc, const char* argv[]) { - Options opts; - if (!parse_options(&opts, argc, argv)) { - std::cerr << "See '" << argv[0] << " --help' for usage" << std::endl; - return EXIT_FAILURE; - } else if (opts.help) { - print_usage(argv[0]); - return EXIT_SUCCESS; - } - - auto config = UniqueCPtr(futhark_context_config_new()); - futhark_context_config_set_logging(config.get(), opts.verbose); - futhark_context_config_set_debugging(config.get(), opts.debug); - - #if defined(FUTHARK_BACKEND_multicore) - futhark_context_config_set_num_threads(config.get(), opts.threads); - #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) - if (opts.device_name) { - futhark_context_config_set_device(config.get(), opts.device_name); - } - - futhark_context_config_set_profiling(config.get(), opts.profile); - #endif - - auto p = pareas::Profiler(opts.profile); - - try { - std::ifstream input(opts.input_path); - if(!input) { - std::cerr << "Failed to open file " << opts.input_path << std::endl; - return EXIT_FAILURE; - } - p.begin(); - - //Stage 0, CPU setup - p.begin(); - Lexer lexer(input); - SymbolTable symtab; - Parser parser(lexer, symtab); - - std::unique_ptr node(parser.parse()); - node->resolveType(); - - DepthTree depth_tree(node.get()); - p.end("Setup"); - - TreeProperties props(node.get()); - std::cout << "Number of nodes: " << props.getNodeCount() << std::endl; - std::cout << "Tree width: " << props.getWidth() << std::endl; - std::cout << "Tree height: " << props.getDepth() << std::endl; - std::cout << "Num functions: " << props.getFunctions() << std::endl; - std::cout << "Max function length: " << props.getMaxFuncLen() << std::endl; - - //return 0; - - auto context = UniqueCPtr(futhark_context_new(config.get())); - - p.set_sync_callback([ctx = context.get()] { - if(futhark_context_sync(ctx)) - throw FutharkError(ctx); - }); - - //Start of GPU - p.begin(); - - //Stage 0, uploading data - p.begin(); - auto stage0_node_types = UniqueFPtr(context.get(), - futhark_new_u8_1d(context.get(), depth_tree.getNodeTypes(), depth_tree.maxNodes())); - auto stage0_data_types = UniqueFPtr(context.get(), - futhark_new_u8_1d(context.get(), depth_tree.getResultingTypes(), depth_tree.maxNodes())); - auto stage0_parents = UniqueFPtr(context.get(), - futhark_new_i32_1d(context.get(), depth_tree.getParents(), depth_tree.maxNodes())); - auto stage0_depth = UniqueFPtr(context.get(), - futhark_new_i32_1d(context.get(), depth_tree.getDepth(), depth_tree.maxNodes())); - auto stage0_child_idx = UniqueFPtr(context.get(), - futhark_new_i32_1d(context.get(), depth_tree.getChildren(), depth_tree.maxNodes())); - auto stage0_node_data = UniqueFPtr(context.get(), - futhark_new_u32_1d(context.get(), depth_tree.getNodeData(), depth_tree.maxNodes())); - auto stage0_symb_data_types = UniqueFPtr(context.get(), - futhark_new_u8_1d(context.get(), symtab.getDataTypes(), symtab.maxVars())); - auto stage0_symb_offsets = UniqueFPtr(context.get(), - futhark_new_u32_1d(context.get(), symtab.getOffsets(), symtab.maxVars())); - auto stage0_function_symbols = UniqueFPtr(context.get(), - futhark_new_u32_1d(context.get(), symtab.getFuncVarCount(), symtab.numFuncs())); - - - UniqueFPtr stage0_tree(context.get()); - UniqueFPtr stage0_symtab(context.get()); - p.measure("Create", [&] { - int err = futhark_entry_make_tree(context.get(), - &stage0_tree, - depth_tree.maxDepth(), - stage0_node_types.get(), - stage0_data_types.get(), - stage0_parents.get(), - stage0_depth.get(), - stage0_child_idx.get(), - stage0_node_data.get()); - if(err) - throw FutharkError(context.get()); - - err = futhark_entry_make_symtab(context.get(), - &stage0_symtab, - stage0_symb_data_types.get(), - stage0_symb_offsets.get()); +#include "pareas/compiler/backend.hpp" +#include "pareas/compiler/futhark_interop.hpp" + +namespace backend { + DeviceModule compile(futhark_context* ctx, DeviceAst& ast, pareas::Profiler& p) { + auto tree = futhark::UniqueTree(ctx); + p.measure("translate ast", [&] { + int err = futhark_entry_backend_convert_tree( + ctx, + &tree, + ast.node_types, + ast.parents, + ast.node_data, + ast.data_types, + ast.node_depths, + ast.child_indexes + ); if(err) - throw FutharkError(context.get()); + throw futhark::Error(ctx); }); - p.end("Upload"); + // Stage 1, preprocessing + p.measure("preprocessing", [&] { + auto old_tree = std::move(tree); - //Stage 1, preprocessing - UniqueFPtr stage1_tree(context.get()); - - p.measure("Preprocessing", [&] { - int err = futhark_entry_stage_preprocess(context.get(), - &stage1_tree, - stage0_tree.get()); + int err = futhark_entry_backend_preprocess(ctx, &tree, old_tree); if(err) - throw FutharkError(context.get()); + throw futhark::Error(ctx); }); - //Stage 2, instruction count - UniqueFPtr stage2_instr_counts(context.get()); - UniqueFPtr stage2_functab(context.get()); - - p.measure("Instruction count", [&] { - UniqueFPtr stage2_sub_func_id(context.get()); - UniqueFPtr stage2_sub_func_start(context.get()); - UniqueFPtr stage2_sub_func_size(context.get()); - - int err = futhark_entry_stage_instr_count(context.get(), - &stage2_instr_counts, - stage1_tree.get()); - if(err) - throw FutharkError(context.get()); - - err = futhark_entry_stage_instr_count_make_function_table(context.get(), - &stage2_sub_func_id, - &stage2_sub_func_start, - &stage2_sub_func_size, - stage1_tree.get(), - stage2_instr_counts.get()); + // Stage 2, instruction count + auto instr_counts = futhark::UniqueArray(ctx); + auto functab = futhark::UniqueFuncInfoArray(ctx); + p.measure("instruction count", [&] { + auto sub_func_id = futhark::UniqueArray(ctx); + auto sub_func_start = futhark::UniqueArray(ctx); + auto sub_func_size = futhark::UniqueArray(ctx); + + int err = futhark_entry_backend_instr_count( + ctx, + &instr_counts, + tree + ); if(err) - throw FutharkError(context.get()); - - err = futhark_entry_stage_compact_functab(context.get(), - &stage2_functab, - stage2_sub_func_id.get(), - stage2_sub_func_start.get(), - stage2_sub_func_size.get()); + throw futhark::Error(ctx); + + err = futhark_entry_backend_instr_count_make_function_table( + ctx, + &sub_func_id, + &sub_func_start, + &sub_func_size, + tree, + instr_counts + ); if(err) - throw FutharkError(context.get()); - }); - - //Stage 3, instruction gen - UniqueFPtr stage3_instr(context.get()); + throw futhark::Error(ctx); - p.measure("Instruction gen", [&] { - int err = futhark_entry_stage_instr_gen(context.get(), - &stage3_instr, - stage1_tree.get(), - stage0_symtab.get(), - stage2_instr_counts.get(), - stage2_functab.get()); - if(err) - throw FutharkError(context.get()); - }); + err = futhark_entry_backend_compact_functab( + ctx, + &functab, + sub_func_id, + sub_func_start, + sub_func_size + ); - //Stage 4, optimizer - UniqueFPtr stage4_instr(context.get()); - UniqueFPtr stage4_functab(context.get()); - UniqueFPtr stage4_optimize(context.get()); - p.measure("Optimize", [&] { - int err = futhark_entry_stage_optimize(context.get(), - &stage4_instr, - &stage4_functab, - &stage4_optimize, - stage3_instr.get(), - stage2_functab.get()); if(err) - throw FutharkError(context.get()); + throw futhark::Error(ctx); }); - //Stage 5-6, regalloc + instr remove - UniqueFPtr stage5_instr(context.get()); - UniqueFPtr stage5_functab(context.get()); - p.measure("Regalloc,Instr remove", [&] { - int err = futhark_entry_stage_regalloc(context.get(), - &stage5_instr, - &stage5_functab, - stage4_instr.get(), - stage4_functab.get(), - stage0_function_symbols.get(), - stage4_optimize.get()); + // Stage 3, instruction gen + auto instr = futhark::UniqueInstrArray(ctx); + p.measure("instruction gen", [&] { + int err = futhark_entry_backend_instr_gen( + ctx, + &instr, + tree, + instr_counts, + functab + ); if(err) - throw FutharkError(context.get()); + throw futhark::Error(ctx); }); - //Stage 7, jump fix - UniqueFPtr stage7_instr(context.get()); - UniqueFPtr result_func_id(context.get()); - UniqueFPtr result_func_start(context.get()); - UniqueFPtr result_func_size(context.get()); - p.measure("Jump Fix", [&] { - int err = futhark_entry_stage_fix_jumps(context.get(), - &stage7_instr, - &result_func_id, - &result_func_start, - &result_func_size, - stage5_instr.get(), - stage5_functab.get()); + // Stage 4, optimizer + auto optimize = futhark::UniqueArray(ctx); + p.measure("optimize", [&] { + auto old_instr = std::move(instr); + auto old_functab = std::move(functab); + + int err = futhark_entry_backend_optimize( + ctx, + &instr, + &functab, + &optimize, + old_instr, + old_functab + ); if(err) - throw FutharkError(context.get()); + throw futhark::Error(ctx); }); - //Stage 8, postprocess - UniqueFPtr result_instr(context.get()); - p.measure("Postprocess", [&] { - int err = futhark_entry_stage_postprocess(context.get(), - &result_instr, - stage7_instr.get()); + // Stage 5-6, regalloc + instr remove + p.measure("regalloc/instr remove", [&] { + auto old_instr = std::move(instr); + auto old_functab = std::move(functab); + + int err = futhark_entry_backend_regalloc( + ctx, + &instr, + &functab, + old_instr, + old_functab, + ast.fn_tab, + optimize + ); if(err) - throw FutharkError(context.get()); + throw futhark::Error(ctx); }); - //End of GPU - p.end("GPU"); - - //End of everything - p.end("Global"); - - if (opts.profile) { - p.dump(std::cout); - // auto report = MallocPtr(futhark_context_report(context.get())); - // std::cout << "Profile report:\n" << report << std::endl; - } - } - catch(const ParseException& e) { - std::cerr << "Parse error: " << e.what() << std::endl; - return EXIT_FAILURE; + // Stage 7, jump fix + auto mod = DeviceModule(ctx); + // p.measure("Jump Fix", [&] { + // auto old_instr = std::move(instr); + + // int err = futhark_entry_backend_fix_jumps( + // ctx, + // &instr, + // &mod.func_id, + // &mod.func_start, + // &mod.func_size, + // instr, + // functab + // ); + // if(err) + // throw futhark::Error(ctx); + // }); + + // // Stage 8, postprocess + // p.measure("postprocess", [&] { + // int err = futhark_entry_backend_postprocess( + // ctx, + // &mod.instructions, + // instr + // ); + // if(err) + // throw futhark::Error(ctx); + // }); + + return mod; } - catch(const FutharkError& e) { - std::cerr << "Futhark error: " << e.what() << std::endl; - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; } diff --git a/src/compiler/backend.fut b/src/compiler/backend.fut index 08d0710..d614b8c 100644 --- a/src/compiler/backend.fut +++ b/src/compiler/backend.fut @@ -1,5 +1,5 @@ import "codegen/tree" -import "datatypes" +import "codegen/datatypes" import "codegen/instr" import "codegen/instr_count" import "codegen/symtab" @@ -7,18 +7,11 @@ import "codegen/register" import "codegen/preprocess" import "codegen/optimizer" import "codegen/postprocess" --- import "bridge" - ---Frontend bridge entry --- entry make_from_frontend [n] --- (node_types: [n]front_node_type) --- (node_res_types: [n]front_data_type) --- (node_parents: [n]front_node_idx_type) --- (node_depth: [n]front_depth_type) --- (node_child_idx : [n]front_child_idx_type) --- (node_data: [n]front_node_data_type) --- (max_depth: front_depth_type) : Tree[n] = --- backend_convert node_types node_res_types node_parents node_depth node_child_idx node_data max_depth + +type Tree [n] = Tree [n] +type Symtab [n] = Symtab [n] +type FuncInfo = FuncInfo +type Instr = Instr let make_variable (data_type: u8) (offset: u32) : Variable = { @@ -75,14 +68,14 @@ let split_instr (instr: Instr) = (instr.instr, instr.rd, instr.rs1, instr.rs2, instr.jt) --Stage 3: instruction gen -entry stage_instr_gen [n] [m] [k] (tree: Tree[n]) (symtab: Symtab[m]) (instr_offset: [n]u32) (func_tab: [k]FuncInfo) : []Instr = +entry stage_instr_gen [n] [k] (tree: Tree[n]) (instr_offset: [n]u32) (func_tab: [k]FuncInfo) : []Instr = let func_start = map (.start) func_tab let func_size = map (.size) func_tab let max_instrs = if n == 0 then 0 else i64.u32 instr_offset[n-1] let instr_offset_i64 = map i64.u32 instr_offset let func_ends = iota k |> map (\i -> func_start[i] + func_size[i]) in - compile_tree tree symtab instr_offset_i64 max_instrs func_start func_ends + compile_tree tree instr_offset_i64 max_instrs func_start func_ends let make_instr (instr: u32) (rd: i64) (rs1: i64) (rs2: i64) (jt: u32) = { diff --git a/src/compiler/backend2.cpp b/src/compiler/backend2.cpp new file mode 100644 index 0000000..f6742b1 --- /dev/null +++ b/src/compiler/backend2.cpp @@ -0,0 +1,494 @@ +#include "futhark_generated.h" + +#include "codegen/lexer.hpp" +#include "codegen/parser.hpp" +#include "codegen/astnode.hpp" +#include "codegen/exception.hpp" +#include "codegen/depthtree.hpp" +#include "codegen/symtab.hpp" +#include "codegen/treeproperties.hpp" + +#include "profiler/profiler.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//const size_t MAX_NODES = 32; +//const size_t MAX_VARS = 32; + +struct Options { + const char* input_path; + const char* output_path; + bool help; + bool verbose; + bool debug; + + // Options available for the multicore backend + int threads; + + // Options abailable for the OpenCL and CUDA backends + const char* device_name; + unsigned profile; +}; + +inline std::string get_error_str(futhark_context* ctx) { + auto err = futhark_context_get_error(ctx); + if (err) { + auto err_str = std::string(err); + free(err); // leak if the string constructor throws, but whatever. + return err_str; + } + + return "(no diagnostic)"; +} + +class FutharkError : public std::runtime_error { + public: + FutharkError(futhark_context* ctx) : std::runtime_error(get_error_str(ctx)) {} + virtual ~FutharkError() = default; +}; + +void print_usage(const char* progname) { + std::cout << + "Usage: " << progname << " [options...] \n" + "Available options:\n" + "-o --output Write the output to . (default: b.out)\n" + "-h --help Show this message and exit.\n" + "-v --verbose Enable Futhark logging.\n" + "-d --debug Enable Futhark debug logging.\n" + #if defined(FUTHARK_BACKEND_multicore) + "Available backend options:\n" + "-t --threads Set the maximum number of threads that may be used.\n" + #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) + "Available backend options:\n" + "--device Select the device that kernels are executed on. Any\n" + " device which name contains may be used. The\n" + " special value #k may be used to select the k-th\n" + " device reported by the platform.\n" + "-p --profile Enable Futhark profiling and print report at exit.\n" + #endif + "\n" + "When and/or are '-', standard input and standard\n" + "output are used respectively.\n"; +} + +bool parse_options(Options* opts, int argc, const char* argv[]) { + *opts = { + .input_path = nullptr, + .output_path = "b.out", + .help = false, + .verbose = false, + .debug = false, + .threads = 0, + .device_name = nullptr, + .profile = 0, + }; + + const char* threads_arg = nullptr; + const char* profile_arg = nullptr; + + for (int i = 1; i < argc; ++i) { + auto arg = std::string_view(argv[i]); + + #if defined(FUTHARK_BACKEND_multicore) + if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + std::cerr << "Error: Expected argument to option " << arg << std::endl; + return false; + } + + threads_arg = argv[i]; + continue; + } + #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) + if (arg == "--device") { + if (++i >= argc) { + std::cerr << "Error: Expected argument to option " << arg << std::endl; + return false; + } + + opts->device_name = argv[i]; + continue; + } + #endif + + if (arg == "-o" || arg == "--output") { + if (++i >= argc) { + std::cerr << "Error: Expected argument to option " << arg << std::endl; + return false; + } + opts->output_path = argv[i]; + } else if (arg == "-h" || arg == "--help") { + opts->help = true; + } else if (arg == "-v" || arg == "--verbose") { + opts->verbose = true; + } else if (arg == "-d" || arg == "--debug") { + opts->debug = true; + } else if (arg == "-p" || arg == "--profile") { + if (++i >= argc) { + std::cerr << "Error: Expected argument to option " << arg << std::endl;; + return false; + } + + profile_arg = argv[i]; + } else if (!opts->input_path) { + opts->input_path = argv[i]; + } else { + std::cerr << "Error: Unknown option '" << arg << "'" << std::endl; + return false; + } + } + + if (opts->help) + return true; + + if (!opts->input_path) { + std::cerr << "Error: Missing required argument " << std::endl; + return false; + } else if (!opts->input_path[0]) { + std::cerr << "Error: may not be empty" << std::endl; + return false; + } + + if (!opts->output_path[0]) { + std::cerr << "Error: may not be empty" << std::endl; + return false; + } + + if (threads_arg) { + const auto* end = threads_arg + std::strlen(threads_arg); + auto [p, ec] = std::from_chars(threads_arg, end, opts->threads); + if (ec != std::errc() || p != end || opts->threads < 1) { + std::cerr << "Error: Invalid value '" << threads_arg << "' for option --threads" << std::endl; + return false; + } + } + + if (profile_arg) { + const auto* end = profile_arg + std::strlen(profile_arg); + auto [p, ec] = std::from_chars(profile_arg, end, opts->profile); + if (ec != std::errc() || p != end) { + std::cerr << "Error: Invalid value " << profile_arg << " for option --profile" << std::endl; + return false; + } + } + + return true; +} + +template +struct Deleter { + void operator()(T* t) const { + deleter(t); + } +}; + +template +using UniqueCPtr = std::unique_ptr>; + +template +struct Free { + void operator()(T* ptr) const { + free(static_cast(ptr)); + } +}; + +template +using MallocPtr = std::unique_ptr>; + +template +class UniqueFPtr { + private: + futhark_context* ctx; + T* data; + public: + UniqueFPtr(futhark_context* ctx) : ctx(ctx), data(nullptr) {} + UniqueFPtr(futhark_context* ctx, T* data) : ctx(ctx), data(data) {} + UniqueFPtr(const UniqueFPtr&) = delete; + UniqueFPtr(UniqueFPtr&& o) { + std::swap(this->ctx, o.ctx); + std::swap(this->data, o.data); + } + + UniqueFPtr& operator=(const UniqueFPtr&) = delete; + UniqueFPtr& operator=(UniqueFPtr&& o) { + std::swap(this->ctx, o.ctx); + std::swap(this->data, o.data); + } + + ~UniqueFPtr() { + if(this->data != nullptr) + deleter(this->ctx, this->data); + } + + T* get() { + return this->data; + } + + T** operator&() { + return &this->data; + } + + void release() { + deleter(this->ctx, this->data); + this->data = nullptr; + } +}; + +int main2(int argc, const char* argv[]) { + Options opts; + if (!parse_options(&opts, argc, argv)) { + std::cerr << "See '" << argv[0] << " --help' for usage" << std::endl; + return EXIT_FAILURE; + } else if (opts.help) { + print_usage(argv[0]); + return EXIT_SUCCESS; + } + + auto config = UniqueCPtr(futhark_context_config_new()); + futhark_context_config_set_logging(config.get(), opts.verbose); + futhark_context_config_set_debugging(config.get(), opts.debug); + + #if defined(FUTHARK_BACKEND_multicore) + futhark_context_config_set_num_threads(config.get(), opts.threads); + #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) + if (opts.device_name) { + futhark_context_config_set_device(config.get(), opts.device_name); + } + + futhark_context_config_set_profiling(config.get(), opts.profile); + #endif + + auto p = pareas::Profiler(opts.profile); + + try { + std::ifstream input(opts.input_path); + if(!input) { + std::cerr << "Failed to open file " << opts.input_path << std::endl; + return EXIT_FAILURE; + } + p.begin(); + + //Stage 0, CPU setup + p.begin(); + Lexer lexer(input); + SymbolTable symtab; + Parser parser(lexer, symtab); + + std::unique_ptr node(parser.parse()); + node->resolveType(); + + DepthTree depth_tree(node.get()); + p.end("Setup"); + + TreeProperties props(node.get()); + std::cout << "Number of nodes: " << props.getNodeCount() << std::endl; + std::cout << "Tree width: " << props.getWidth() << std::endl; + std::cout << "Tree height: " << props.getDepth() << std::endl; + std::cout << "Num functions: " << props.getFunctions() << std::endl; + std::cout << "Max function length: " << props.getMaxFuncLen() << std::endl; + + //return 0; + + auto context = UniqueCPtr(futhark_context_new(config.get())); + + p.set_sync_callback([ctx = context.get()] { + if(futhark_context_sync(ctx)) + throw FutharkError(ctx); + }); + + //Start of GPU + p.begin(); + + //Stage 0, uploading data + p.begin(); + auto stage0_node_types = UniqueFPtr(context.get(), + futhark_new_u8_1d(context.get(), depth_tree.getNodeTypes(), depth_tree.maxNodes())); + auto stage0_data_types = UniqueFPtr(context.get(), + futhark_new_u8_1d(context.get(), depth_tree.getResultingTypes(), depth_tree.maxNodes())); + auto stage0_parents = UniqueFPtr(context.get(), + futhark_new_i32_1d(context.get(), depth_tree.getParents(), depth_tree.maxNodes())); + auto stage0_depth = UniqueFPtr(context.get(), + futhark_new_i32_1d(context.get(), depth_tree.getDepth(), depth_tree.maxNodes())); + auto stage0_child_idx = UniqueFPtr(context.get(), + futhark_new_i32_1d(context.get(), depth_tree.getChildren(), depth_tree.maxNodes())); + auto stage0_node_data = UniqueFPtr(context.get(), + futhark_new_u32_1d(context.get(), depth_tree.getNodeData(), depth_tree.maxNodes())); + auto stage0_symb_data_types = UniqueFPtr(context.get(), + futhark_new_u8_1d(context.get(), symtab.getDataTypes(), symtab.maxVars())); + auto stage0_symb_offsets = UniqueFPtr(context.get(), + futhark_new_u32_1d(context.get(), symtab.getOffsets(), symtab.maxVars())); + auto stage0_function_symbols = UniqueFPtr(context.get(), + futhark_new_u32_1d(context.get(), symtab.getFuncVarCount(), symtab.numFuncs())); + + + UniqueFPtr stage0_tree(context.get()); + UniqueFPtr stage0_symtab(context.get()); + p.measure("Create", [&] { + int err = futhark_entry_make_tree(context.get(), + &stage0_tree, + depth_tree.maxDepth(), + stage0_node_types.get(), + stage0_data_types.get(), + stage0_parents.get(), + stage0_depth.get(), + stage0_child_idx.get(), + stage0_node_data.get()); + if(err) + throw FutharkError(context.get()); + + err = futhark_entry_make_symtab(context.get(), + &stage0_symtab, + stage0_symb_data_types.get(), + stage0_symb_offsets.get()); + if(err) + throw FutharkError(context.get()); + }); + + p.end("Upload"); + + //Stage 1, preprocessing + UniqueFPtr stage1_tree(context.get()); + + p.measure("Preprocessing", [&] { + int err = futhark_entry_stage_preprocess(context.get(), + &stage1_tree, + stage0_tree.get()); + if(err) + throw FutharkError(context.get()); + }); + + //Stage 2, instruction count + UniqueFPtr stage2_instr_counts(context.get()); + UniqueFPtr stage2_functab(context.get()); + + p.measure("Instruction count", [&] { + UniqueFPtr stage2_sub_func_id(context.get()); + UniqueFPtr stage2_sub_func_start(context.get()); + UniqueFPtr stage2_sub_func_size(context.get()); + + int err = futhark_entry_stage_instr_count(context.get(), + &stage2_instr_counts, + stage1_tree.get()); + if(err) + throw FutharkError(context.get()); + + err = futhark_entry_stage_instr_count_make_function_table(context.get(), + &stage2_sub_func_id, + &stage2_sub_func_start, + &stage2_sub_func_size, + stage1_tree.get(), + stage2_instr_counts.get()); + if(err) + throw FutharkError(context.get()); + + err = futhark_entry_stage_compact_functab(context.get(), + &stage2_functab, + stage2_sub_func_id.get(), + stage2_sub_func_start.get(), + stage2_sub_func_size.get()); + if(err) + throw FutharkError(context.get()); + }); + + //Stage 3, instruction gen + UniqueFPtr stage3_instr(context.get()); + + p.measure("Instruction gen", [&] { + int err = futhark_entry_stage_instr_gen(context.get(), + &stage3_instr, + stage1_tree.get(), + stage0_symtab.get(), + stage2_instr_counts.get(), + stage2_functab.get()); + if(err) + throw FutharkError(context.get()); + }); + + //Stage 4, optimizer + UniqueFPtr stage4_instr(context.get()); + UniqueFPtr stage4_functab(context.get()); + UniqueFPtr stage4_optimize(context.get()); + p.measure("Optimize", [&] { + int err = futhark_entry_stage_optimize(context.get(), + &stage4_instr, + &stage4_functab, + &stage4_optimize, + stage3_instr.get(), + stage2_functab.get()); + if(err) + throw FutharkError(context.get()); + }); + + //Stage 5-6, regalloc + instr remove + UniqueFPtr stage5_instr(context.get()); + UniqueFPtr stage5_functab(context.get()); + p.measure("Regalloc,Instr remove", [&] { + int err = futhark_entry_stage_regalloc(context.get(), + &stage5_instr, + &stage5_functab, + stage4_instr.get(), + stage4_functab.get(), + stage0_function_symbols.get(), + stage4_optimize.get()); + if(err) + throw FutharkError(context.get()); + }); + + //Stage 7, jump fix + UniqueFPtr stage7_instr(context.get()); + UniqueFPtr result_func_id(context.get()); + UniqueFPtr result_func_start(context.get()); + UniqueFPtr result_func_size(context.get()); + p.measure("Jump Fix", [&] { + int err = futhark_entry_stage_fix_jumps(context.get(), + &stage7_instr, + &result_func_id, + &result_func_start, + &result_func_size, + stage5_instr.get(), + stage5_functab.get()); + if(err) + throw FutharkError(context.get()); + }); + + //Stage 8, postprocess + UniqueFPtr result_instr(context.get()); + p.measure("Postprocess", [&] { + int err = futhark_entry_stage_postprocess(context.get(), + &result_instr, + stage7_instr.get()); + if(err) + throw FutharkError(context.get()); + }); + + //End of GPU + p.end("GPU"); + + //End of everything + p.end("Global"); + + if (opts.profile) { + p.dump(std::cout); + // auto report = MallocPtr(futhark_context_report(context.get())); + // std::cout << "Profile report:\n" << report << std::endl; + } + } + catch(const ParseException& e) { + std::cerr << "Parse error: " << e.what() << std::endl; + return EXIT_FAILURE; + } + catch(const FutharkError& e) { + std::cerr << "Futhark error: " << e.what() << std::endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/compiler/bridge.fut b/src/compiler/bridge.fut index 44a9e5a..5618088 100644 --- a/src/compiler/bridge.fut +++ b/src/compiler/bridge.fut @@ -1,17 +1,66 @@ -import "tree" +import "codegen/tree" +import "codegen/datatypes" +import "frontend" import "datatypes" +import "passes/util" -type front_node_type = i32 -type front_data_type = i32 +module production = g.production +local open g + +type front_node_type = production.t +type front_data_type = data_type type front_node_idx_type = i32 type front_depth_type = i32 type front_child_idx_type = i32 type front_node_data_type = u32 -let NODE_TYPE_LOOKUP : []NodeType = [ - --TODO: fill in table to map node types - 0 -] +-- Note: Keep in sync with pareas.g +let NODE_TYPE_LOOKUP = mk_production_array node_type_invalid [ + (production_fn_decl_list, node_type_statement_list), + (production_fn_decl, node_type_func_decl), + (production_stat_while, node_type_while_stat), + (production_stat_if, node_type_if_stat), + (production_stat_expr, node_type_expr_stat), + (production_stat_return, node_type_return_stat), + (production_while_dummy, node_type_while_dummy), + (production_stat_if_else, node_type_if_else_stat), + (production_stat_list, node_type_statement_list), + (production_assign, node_type_assign_expr), + (production_logical_or, node_type_lor_expr), + (production_logical_and, node_type_land_expr), + (production_rela_eq, node_type_eq_expr), + (production_rela_neq, node_type_neq_expr), + (production_rela_gt, node_type_great_expr), + (production_rela_gte, node_type_greateq_expr), + (production_rela_lt, node_type_less_expr), + (production_rela_lte, node_type_lesseq_expr), + (production_bitwise_and, node_type_bitand_expr), + (production_bitwise_or, node_type_bitor_expr), + (production_bitwise_xor, node_type_bitxor_expr), + (production_shift_lr, node_type_rshift_expr), + (production_shift_ar, node_type_urshift_expr), + (production_shift_ll, node_type_lshift_expr), + (production_sum_add, node_type_add_expr), + (production_sum_sub, node_type_sub_expr), + (production_prod_mul, node_type_mul_expr), + (production_prod_div, node_type_div_expr), + (production_prod_mod, node_type_mod_expr), + (production_atom_unary_neg, node_type_neg_expr), + (production_atom_unary_bitflip, node_type_bitnot_expr), + (production_atom_unary_not, node_type_lnot_expr), + (production_atom_cast, node_type_cast_expr), + (production_atom_decl, node_type_decl_expr), + (production_atom_name, node_type_id_expr), + (production_atom_int, node_type_lit_expr), + (production_atom_float, node_type_lit_expr), + (production_atom_fn_call, node_type_func_call_expr), + (production_atom_unary_deref, node_type_deref_expr), + (production_atom_decl_explicit, node_type_decl_expr), + (production_arg_list, node_type_func_call_arg_list), + (production_arg, node_type_func_call_arg), + (production_param_list, node_type_func_arg_list), + (production_param, node_type_func_arg) + ] let DATA_TYPE_LOOKUP : []DataType = [ 0, --Invalid @@ -23,17 +72,14 @@ let DATA_TYPE_LOOKUP : []DataType = [ ] let convert_node_type (node_type: front_node_type) = - NODE_TYPE_LOOKUP[node_type] + NODE_TYPE_LOOKUP[i32.u8 node_type] let convert_data_type (data_type: front_data_type) = - DATA_TYPE_LOOKUP[data_type] + DATA_TYPE_LOOKUP[i32.u8 data_type] let convert_node_idx (idx: front_node_idx_type) = idx -let convert_depth (depth: front_depth_type) = - depth - let convert_child_idx (child_idx: front_child_idx_type) = child_idx @@ -46,12 +92,12 @@ let backend_convert_node ( parent: front_node_idx_type, depth: front_depth_type, child_idx: front_child_idx_type, - data: front_node_data_type) : Node = + data: front_node_data_type) : Node = { node_type = convert_node_type node_type, resulting_type = convert_data_type data_type, parent = convert_node_idx parent, - depth = convert_depth depth, + depth = depth, child_idx = convert_child_idx child_idx, node_data = convert_node_data data } @@ -68,21 +114,17 @@ let zip6 [n] 'a 'b 'c 'd 'e 'f in map2 (\(t0, t1, t2, t3, t4) t5 -> (t0, t1, t2, t3, t4, t5)) c1 x5 -let backend_convert [n] +let convert_ast [n] (node_types: [n]front_node_type) (node_res_types: [n]front_data_type) (node_parents: [n]front_node_idx_type) (node_depth: [n]front_depth_type) (node_child_idx : [n]front_child_idx_type) - (node_data: [n]front_node_data_type) - (max_depth: front_depth_type) : Tree[n] = - + (node_data: [n]front_node_data_type): Tree[n] = let input = zip6 node_types node_res_types node_parents node_depth node_child_idx node_data - let nodes: [n]Node = input |> map backend_convert_node - in { nodes = nodes, - max_depth = convert_depth max_depth - } \ No newline at end of file + max_depth = i32.maximum node_depth + } diff --git a/src/compiler/codegen/bridge.fut b/src/compiler/codegen/bridge.fut deleted file mode 100644 index b90c05b..0000000 --- a/src/compiler/codegen/bridge.fut +++ /dev/null @@ -1,88 +0,0 @@ -import "tree" -import "datatypes" - -type front_node_type = i32 -type front_data_type = i32 -type front_node_idx_type = i32 -type front_depth_type = i32 -type front_child_idx_type = i32 -type front_node_data_type = u32 - -let NODE_TYPE_LOOKUP : []NodeType = [ - --TODO: fill in table to map node types - 0 -] - -let DATA_TYPE_LOOKUP : []DataType = [ - 0, --Invalid - 1, --Void - 2, --Int - 3, --Float - 4, --Int_ref - 5 --Float_ref -] - -let convert_node_type (node_type: front_node_type) = - NODE_TYPE_LOOKUP[node_type] - -let convert_data_type (data_type: front_data_type) = - DATA_TYPE_LOOKUP[data_type] - -let convert_node_idx (idx: front_node_idx_type) = - idx - -let convert_depth (depth: front_depth_type) = - depth - -let convert_child_idx (child_idx: front_child_idx_type) = - child_idx - -let convert_node_data (node_data: front_node_data_type) = - node_data - -let backend_convert_node ( - node_type: front_node_type, - data_type: front_data_type, - parent: front_node_idx_type, - depth: front_depth_type, - child_idx: front_child_idx_type, - data: front_node_data_type) : Node = - { - node_type = convert_node_type node_type, - resulting_type = convert_data_type data_type, - parent = convert_node_idx parent, - depth = convert_depth depth, - child_idx = convert_child_idx child_idx, - node_data = convert_node_data data - } - -let zip6 [n] 'a 'b 'c 'd 'e 'f - (x0: [n]a) - (x1: [n]b) - (x2: [n]c) - (x3: [n]d) - (x4: [n]e) - (x5: [n]f) = - - let c1 = zip5 x0 x1 x2 x3 x4 - in - map2 (\(t0, t1, t2, t3, t4) t5 -> (t0, t1, t2, t3, t4, t5)) c1 x5 - -let backend_convert [n] - (node_types: [n]front_node_type) - (node_res_types: [n]front_data_type) - (node_parents: [n]front_node_idx_type) - (node_depth: [n]front_depth_type) - (node_child_idx : [n]front_child_idx_type) - (node_data: [n]front_node_data_type) - (max_depth: front_depth_type) : Tree[n] = - - let input = zip6 node_types node_res_types node_parents node_depth node_child_idx node_data - - let nodes: [n]Node = input |> - map backend_convert_node - - in { - nodes = nodes, - max_depth = convert_depth max_depth - } diff --git a/src/compiler/codegen/datatypes.fut b/src/compiler/codegen/datatypes.fut new file mode 100644 index 0000000..400baf7 --- /dev/null +++ b/src/compiler/codegen/datatypes.fut @@ -0,0 +1,8 @@ +type DataType = i32 + +let datatype_invalid : i32 = 0 +let datatype_void : i32 = 1 +let datatype_int : i32 = 2 +let datatype_float : i32 = 3 +let datatype_int_ref : i32 = 4 +let datatype_float_ref : i32 = 5 diff --git a/src/compiler/codegen/instr.fut b/src/compiler/codegen/instr.fut index ec6fde0..62155e9 100644 --- a/src/compiler/codegen/instr.fut +++ b/src/compiler/codegen/instr.fut @@ -1,8 +1,7 @@ import "tree" import "datatypes" -import "symtab" import "instr_count" -import "../lib/github.com/diku-dk/sorts/radix_sort" +import "../../../lib/github.com/diku-dk/sorts/radix_sort" type Instr = { instr: u32, @@ -1870,13 +1869,13 @@ let OPERAND_TABLE : [][][][]i8 = [ ] --Arg stack ] -let node_instr(node_type: NodeType) (data_type: DataType) (instr_offset: i64) : u32 = +let node_instr (node_type: NodeType) (data_type: DataType) (instr_offset: i64) : u32 = INSTR_TABLE[node_type, instr_offset, data_type] let has_instr (node_type: NodeType) (data_type: DataType) (instr_offset: i64) : bool = HAS_INSTR_TABLE[node_type, instr_offset, data_type] -let node_has_return(_ : NodeType) (data_type : DataType) : bool = +let node_has_return (_ : NodeType) (data_type : DataType) : bool = !(data_type == datatype_void || data_type == node_type_invalid) let parent_arg_idx (node: Node) : i64 = @@ -1953,14 +1952,14 @@ let signextend(x: u32) = let signed_x = i32.u32 x in u32.i32 (signed_x << 20 >> 20) -let instr_constant [max_vars] (node: Node) (instr_offset: i64) (symtab: Symtab[max_vars]) : u32 = +let instr_constant (node: Node) (instr_offset: i64) : u32 = let calc_type = INSTR_CONSTANT_TABLE[node.node_type, instr_offset] in if calc_type == 1 then node.node_data - (signextend (node.node_data & 0xFFF)) & 0xFFFFF000 else if calc_type == 2 then (node.node_data & 0xFFF) << 20 else if calc_type == 3 then - (-(4 * ((symtab_local_offset symtab node.node_data) + 2))) << 20 + (-(4 * (node.node_data + 2))) << 20 else if calc_type == 4 then (4 * node.node_data) << 20 else if calc_type == 5 then @@ -2042,7 +2041,7 @@ let get_output_register [tree_size] (tree: Tree[tree_size]) (node: Node) (instr_ else if node_has_output tree.nodes node instr_offset then register instr_no else 0 -let get_node_instr [tree_size] [max_vars] (tree: Tree[tree_size]) (node: Node) (instr_no: i64) (node_index: i64) (registers: []i64) (symtab: Symtab[max_vars]) (func_starts: []u32) (func_ends: []u32) (instr_offset: i64): (i64, i64, Instr, i64) = +let get_node_instr [tree_size] (tree: Tree[tree_size]) (node: Node) (instr_no: i64) (node_index: i64) (registers: []i64) (func_starts: []u32) (func_ends: []u32) (instr_offset: i64): (i64, i64, Instr, i64) = let node_type = node.node_type let data_type = node.resulting_type let rd = get_output_register tree node instr_no instr_offset @@ -2052,7 +2051,7 @@ let get_node_instr [tree_size] [max_vars] (tree: Tree[tree_size]) (node: Node) ( instr_loc, node_get_parent_arg_idx tree.nodes node instr_offset, { - instr = node_instr node_type data_type instr_offset | instr_constant node instr_offset symtab, + instr = node_instr node_type data_type instr_offset | instr_constant node instr_offset, rd = rd, rs1 = node_get_instr_arg node_index node registers 0 instr_no instr_offset, rs2 = node_get_instr_arg node_index node registers 1 instr_no instr_offset, @@ -2061,7 +2060,7 @@ let get_node_instr [tree_size] [max_vars] (tree: Tree[tree_size]) (node: Node) ( get_data_prop_value tree node rd instr_no ) -let compile_node [tree_size] [max_vars] (tree: Tree[tree_size]) (symtab: Symtab[max_vars]) (registers: []i64) (instr_offset: [tree_size]i64) (func_starts: []u32) (func_ends: []u32) +let compile_node [tree_size] (tree: Tree[tree_size]) (registers: []i64) (instr_offset: [tree_size]i64) (func_starts: []u32) (func_ends: []u32) (node_index: i64) = let node = tree.nodes[node_index] let node_instr = instr_offset[node_index] @@ -2069,7 +2068,7 @@ let compile_node [tree_size] [max_vars] (tree: Tree[tree_size]) (symtab: Symtab[ iota 4i64 |> map (\i -> if has_instr node.node_type node.resulting_type i then - get_node_instr tree node (node_instr+i) node_index registers symtab func_starts func_ends i + get_node_instr tree node (node_instr+i) node_index registers func_starts func_ends i else if i == 0 then (-1, node_get_parent_arg_idx tree.nodes node 0, EMPTY_INSTR, get_data_prop_value tree node 0 node_instr) else @@ -2092,7 +2091,7 @@ let check_idx_node_depth [tree_size] (tree: Tree[tree_size]) (depth: i32) (i: i6 let bit_width (x: i32): i32 = i32.num_bits - (i32.clz x) -let compile_tree [tree_size] [max_vars] [num_funcs] (tree: Tree[tree_size]) (symtab: Symtab[max_vars]) (instr_offset: [tree_size]i64) (max_instrs: i64) (func_starts: [num_funcs]u32) (func_ends: [num_funcs]u32) = +let compile_tree [tree_size] [num_funcs] (tree: Tree[tree_size]) (instr_offset: [tree_size]i64) (max_instrs: i64) (func_starts: [num_funcs]u32) (func_ends: [num_funcs]u32) = let idx_array = iota tree_size |> radix_sort (bit_width tree.max_depth) (\bit idx -> i32.get_bit bit tree.nodes[idx].depth) let depth_starts = iota tree_size |> filter (\i -> i == 0 || tree.nodes[idx_array[i]].depth != tree.nodes[idx_array[i-1]].depth) let initial_registers = replicate (tree_size * PARENT_IDX_PER_NODE) 0i64 @@ -2104,7 +2103,7 @@ let compile_tree [tree_size] [max_vars] [num_funcs] (tree: Tree[tree_size]) (sym let end = if j == tree.max_depth then tree_size else depth_starts[j + 1] let (idx, parent_idx, instrs, new_regs) = idx_array[start:end] |> - map (compile_node tree symtab (copy registers) instr_offset func_starts func_ends) |> + map (compile_node tree (copy registers) instr_offset func_starts func_ends) |> flatten |> unzip4 in diff --git a/src/compiler/codegen/register.fut b/src/compiler/codegen/register.fut index e18c975..db4fcdc 100644 --- a/src/compiler/codegen/register.fut +++ b/src/compiler/codegen/register.fut @@ -1,5 +1,5 @@ import "instr" -import "../lib/github.com/diku-dk/segmented/segmented" +import "../../../lib/github.com/diku-dk/segmented/segmented" type FuncInfo = { id: u32, diff --git a/src/compiler/codegen/symtab.fut b/src/compiler/codegen/symtab.fut deleted file mode 100755 index 75b3d03..0000000 --- a/src/compiler/codegen/symtab.fut +++ /dev/null @@ -1,13 +0,0 @@ -import "datatypes" - -type Variable = { - decl_type: DataType, - offset: u32 -} - -type Symtab [symtab_var_size] = { - variables: [symtab_var_size]Variable -} - -let symtab_local_offset [var_size] (symtab: Symtab[var_size]) (var_id: u32) = - symtab.variables[i64.u32 var_id].offset diff --git a/src/compiler/frontend.fut b/src/compiler/frontend.fut index 3174ef5..0705590 100644 --- a/src/compiler/frontend.fut +++ b/src/compiler/frontend.fut @@ -43,108 +43,106 @@ entry mk_parse_table [n] = mk_strtab table offsets lengths type token = (token.t, i32, i32) -entry frontend_tokenize (input: []u8) (lt: lex_table []): []token = +entry tokenize (input: []u8) (lt: lex_table []): []token = tokenize input lt -entry frontend_num_tokens [n] (_: [n]token): i32 = i32.i64 n +entry num_tokens [n] (_: [n]token): i32 = i32.i64 n -entry frontend_parse (tokens: []token) (sct: stack_change_table []) (pt: parse_table []): (bool, []production.t) = +entry parse (tokens: []token) (sct: stack_change_table []) (pt: parse_table []): (bool, []production.t) = let token_types = map (.0) tokens in if pareas_parser.check token_types sct then (true, pareas_parser.parse token_types pt) else (false, []) -entry frontend_build_parse_tree [n] (node_types: [n]production.t) (arities: arity_array): [n]i32 = +entry build_parse_tree [n] (node_types: [n]production.t) (arities: arity_array): [n]i32 = pareas_parser.build_parent_vector node_types arities -entry frontend_fix_bin_ops [n] (node_types: *[n]production.t) (parents: *[n]i32): ([]production.t, []i32) = +entry fix_bin_ops [n] (node_types: *[n]production.t) (parents: *[n]i32): ([]production.t, []i32) = let (node_types, parents) = fix_bin_ops node_types parents let (parents, old_index) = compactify parents |> unzip let node_types = gather node_types old_index in (node_types, parents) -entry frontend_fix_if_else [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = +entry fix_if_else [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = fix_if_else node_types parents -entry frontend_flatten_lists [n] (node_types: *[n]production.t) (parents: *[n]i32): ([n]production.t, [n]i32) = +entry flatten_lists [n] (node_types: *[n]production.t) (parents: *[n]i32): ([n]production.t, [n]i32) = flatten_lists node_types parents -entry frontend_fix_names [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = +entry fix_names [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = fix_names node_types parents -entry frontend_fix_ascriptions [n] (node_types: [n]production.t) (parents: *[n]i32): [n]i32 = +entry fix_ascriptions [n] (node_types: [n]production.t) (parents: *[n]i32): [n]i32 = fix_ascriptions node_types parents -entry frontend_fix_fn_decls [n] (node_types: [n]production.t) (parents: *[n]i32): (bool, [n]i32) = +entry fix_fn_decls [n] (node_types: [n]production.t) (parents: *[n]i32): (bool, [n]i32) = fix_fn_decls node_types parents -entry frontend_fix_args_and_params [n] (node_types: *[n]production.t) (parents: [n]i32): [n]production.t = +entry fix_args_and_params [n] (node_types: *[n]production.t) (parents: [n]i32): [n]production.t = let node_types = reinsert_arg_lists node_types let node_types = fix_param_lists node_types parents in node_types -entry frontend_fix_decls [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = +entry fix_decls [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = let node_types = fix_param_lists node_types parents let valid = check_fn_params node_types parents let (node_types, parents) = squish_decl_ascripts node_types parents in (valid, node_types, parents) -entry frontend_remove_marker_nodes [n] (node_types: [n]production.t) (parents: *[n]i32): [n]i32 = +entry remove_marker_nodes [n] (node_types: [n]production.t) (parents: *[n]i32): [n]i32 = remove_marker_nodes node_types parents -entry frontend_compute_prev_sibling [n] (node_types: *[n]production.t) (parents: *[n]i32): ([]production.t, []i32, []i32) = +entry compute_prev_sibling [n] (node_types: *[n]production.t) (parents: *[n]i32): ([]production.t, []i32, []i32) = let (parents, old_index) = compactify parents |> unzip let node_types = gather node_types old_index let depths = compute_depths parents let prev_siblings = build_sibling_vector parents depths in (node_types, parents, prev_siblings) -entry frontend_check_assignments [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32): bool = +entry check_assignments [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32): bool = check_assignments node_types parents prev_siblings -entry frontend_insert_derefs [n] (node_types: *[n]production.t) (parents: *[n]i32) (prev_siblings: *[n]i32) - : ([]production.t, []i32, []i32) - = +entry insert_derefs [n] (node_types: *[n]production.t) (parents: *[n]i32) (prev_siblings: *[n]i32): ([]production.t, []i32, []i32) = insert_derefs node_types parents prev_siblings |> unzip3 -entry frontend_extract_lexemes [n] (input: []u8) (tokens: []token) (node_types: [n]production.t): [n]u32 = +entry extract_lexemes [n] (input: []u8) (tokens: []token) (node_types: [n]production.t): [n]u32 = build_data_vector node_types input tokens -entry frontend_resolve_vars [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (data: [n]u32): (bool, [n]i32) = +entry resolve_vars [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (data: [n]u32): (bool, [n]i32) = let right_leafs = build_right_leaf_vector parents prev_siblings in resolve_vars node_types parents prev_siblings right_leafs data -entry frontend_resolve_fns [n] (node_types: [n]production.t) (resolution: *[n]i32) (data: [n]u32): (bool, [n]i32) = +entry resolve_fns [n] (node_types: [n]production.t) (resolution: *[n]i32) (data: [n]u32): (bool, [n]i32) = let (valid, fn_resolution) = resolve_fns node_types data -- This works because declarations and function calls are disjoint. let resolution = merge_resolutions resolution fn_resolution in (valid, resolution) -entry frontend_resolve_args [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (resolution: *[n]i32): (bool, [n]i32) = +entry resolve_args [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (resolution: *[n]i32): (bool, [n]i32) = let (valid, arg_resolution) = resolve_args node_types parents prev_siblings resolution -- This works because declarations, function calls, and function arg wrappers are disjoint. let resolution = merge_resolutions resolution arg_resolution in (valid, resolution) -entry frontend_resolve_data_types [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (resolution: [n]i32): (bool, [n]data_type.t) = +entry resolve_data_types [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (resolution: [n]i32): (bool, [n]data_type.t) = let data_types = resolve_types node_types parents prev_siblings resolution let types_valid = check_types node_types parents prev_siblings data_types in (types_valid, data_types) -entry frontend_check_return_types [n] (node_types: [n]production.t) (parents: [n]i32) (data_types: [n]data_type): bool = +entry check_return_types [n] (node_types: [n]production.t) (parents: [n]i32) (data_types: [n]data_type): bool = check_return_types node_types parents data_types -entry frontend_check_convergence [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (data_types: [n]data_type): bool = +entry check_convergence [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (data_types: [n]data_type): bool = check_return_paths node_types parents prev_siblings data_types -entry frontend_build_ast [n] +entry build_ast [n] (node_types: *[n]production.t) (parents: *[n]i32) (data: *[n]u32) (data_types: *[n]data_type) (prev_siblings: *[n]i32) (resolution: *[n]i32) - : ([]production.t, []i32, []u32, []data_type, []i32, []i32, []i32) + : ([]production.t, []i32, []u32, []data_type, []i32, []i32, []u32) = let (data, fn_tab) = assign_ids node_types resolution data_types data -- Compute the child index from the parent diff --git a/src/compiler/main.cpp b/src/compiler/main.cpp index 2f6a2d7..4391c09 100644 --- a/src/compiler/main.cpp +++ b/src/compiler/main.cpp @@ -4,6 +4,7 @@ #include "pareas/compiler/futhark_interop.hpp" #include "pareas/compiler/ast.hpp" #include "pareas/compiler/frontend.hpp" +#include "pareas/compiler/backend.hpp" #include "pareas/profiler/profiler.hpp" #include @@ -253,6 +254,10 @@ int main(int argc, char* argv[]) { auto ast = frontend::compile(ctx.get(), input, opts.verbose_tree, p, opts.futhark_debug_extra ? stderr : nullptr); p.end("frontend"); + p.begin(); + backend::compile(ctx.get(), ast, p); + p.end("backend"); + if (opts.profile > 0) p.dump(std::cout); diff --git a/src/compiler/main.fut b/src/compiler/main.fut index 5bb90c8..af46a67 100644 --- a/src/compiler/main.fut +++ b/src/compiler/main.fut @@ -1,8 +1,153 @@ -import "instr" +module frontend = import "frontend" +module backend = import "backend" +module bridge = import "bridge" --- let main (n : i64) : u32 = --- let t = tree.alloc_tree n --- let result = tree.walk_tree t --- in result +import "datatypes" -let main (n: i64): i64 = n * 2 +-- frontend + +module g = frontend.g +module production = g.production +module token = g.token + +type~ lex_table [n] = frontend.lex_table [n] +type~ stack_change_table [n] = frontend.stack_change_table [n] +type~ parse_table [n] = frontend.parse_table [n] +type~ arity_array = frontend.arity_array + +type token = frontend.token + +entry mk_lex_table [n] (is: [256]frontend.lexer.state) (mt: [n][n]frontend.lexer.state) (fs: [n]token.t): lex_table [n] + = frontend.mk_lex_table is mt fs + +entry mk_stack_change_table [n] + (table: [n]g.bracket.t) + (offsets: [g.num_tokens][g.num_tokens]i32) + (lengths: [g.num_tokens][g.num_tokens]i32): stack_change_table [n] + = frontend.mk_stack_change_table table offsets lengths + +entry mk_parse_table [n] + (table: [n]production.t) + (offsets: [g.num_tokens][g.num_tokens]i32) + (lengths: [g.num_tokens][g.num_tokens]i32): parse_table [n] + = frontend.mk_parse_table table offsets lengths + +entry frontend_tokenize (input: []u8) (lt: lex_table []): []token = + frontend.tokenize input lt + +entry frontend_num_tokens [n] (_: [n]token): i32 = i32.i64 n + +entry frontend_parse (tokens: []token) (sct: stack_change_table []) (pt: parse_table []): (bool, []production.t) = + frontend.parse tokens sct pt + +entry frontend_build_parse_tree [n] (node_types: [n]production.t) (arities: arity_array): [n]i32 = + frontend.build_parse_tree node_types arities + +entry frontend_fix_bin_ops [n] (node_types: *[n]production.t) (parents: *[n]i32): ([]production.t, []i32) = + frontend.fix_bin_ops node_types parents + +entry frontend_fix_if_else [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = + frontend.fix_if_else node_types parents + +entry frontend_flatten_lists [n] (node_types: *[n]production.t) (parents: *[n]i32): ([n]production.t, [n]i32) = + frontend.flatten_lists node_types parents + +entry frontend_fix_names [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = + frontend.fix_names node_types parents + +entry frontend_fix_ascriptions [n] (node_types: [n]production.t) (parents: *[n]i32): [n]i32 = + frontend.fix_ascriptions node_types parents + +entry frontend_fix_fn_decls [n] (node_types: [n]production.t) (parents: *[n]i32): (bool, [n]i32) = + frontend.fix_fn_decls node_types parents + +entry frontend_fix_args_and_params [n] (node_types: *[n]production.t) (parents: [n]i32): [n]production.t = + frontend.fix_args_and_params node_types parents + +entry frontend_fix_decls [n] (node_types: *[n]production.t) (parents: *[n]i32): (bool, [n]production.t, [n]i32) = + frontend.fix_decls node_types parents + +entry frontend_remove_marker_nodes [n] (node_types: [n]production.t) (parents: *[n]i32): [n]i32 = + frontend.remove_marker_nodes node_types parents + +entry frontend_compute_prev_sibling [n] (node_types: *[n]production.t) (parents: *[n]i32): ([]production.t, []i32, []i32) = + frontend.compute_prev_sibling node_types parents + +entry frontend_check_assignments [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32): bool = + frontend.check_assignments node_types parents prev_siblings + +entry frontend_insert_derefs [n] (node_types: *[n]production.t) (parents: *[n]i32) (prev_siblings: *[n]i32): ([]production.t, []i32, []i32) = + frontend.insert_derefs node_types parents prev_siblings + +entry frontend_extract_lexemes [n] (input: []u8) (tokens: []token) (node_types: [n]production.t): [n]u32 = + frontend.extract_lexemes input tokens node_types + +entry frontend_resolve_vars [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (data: [n]u32): (bool, [n]i32) = + frontend.resolve_vars node_types parents prev_siblings data + +entry frontend_resolve_fns [n] (node_types: [n]production.t) (resolution: *[n]i32) (data: [n]u32): (bool, [n]i32) = + frontend.resolve_fns node_types resolution data + +entry frontend_resolve_args [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (resolution: *[n]i32): (bool, [n]i32) = + frontend.resolve_args node_types parents prev_siblings resolution + +entry frontend_resolve_data_types [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (resolution: [n]i32): (bool, [n]data_type.t) = + frontend.resolve_data_types node_types parents prev_siblings resolution + +entry frontend_check_return_types [n] (node_types: [n]production.t) (parents: [n]i32) (data_types: [n]data_type): bool = + frontend.check_return_types node_types parents data_types + +entry frontend_check_convergence [n] (node_types: [n]production.t) (parents: [n]i32) (prev_siblings: [n]i32) (data_types: [n]data_type): bool = + frontend.check_convergence node_types parents prev_siblings data_types + +entry frontend_build_ast [n] + (node_types: *[n]production.t) + (parents: *[n]i32) + (data: *[n]u32) + (data_types: *[n]data_type) + (prev_siblings: *[n]i32) + (resolution: *[n]i32) + : ([]production.t, []i32, []u32, []data_type, []i32, []i32, []u32) + = frontend.build_ast node_types parents data data_types prev_siblings resolution + +-- backend + +type Tree [n] = backend.Tree [n] +type FuncInfo = backend.FuncInfo +type Instr = backend.Instr + +entry backend_convert_tree [n] + (node_types: *[n]production.t) + (parents: *[n]i32) + (data: *[n]u32) + (data_types: *[n]data_type) + (depths: *[n]i32) + (child_idx: *[n]i32): Tree[n] + = bridge.convert_ast node_types data_types parents depths child_idx data + +entry backend_preprocess [n] (tree: Tree[n]): (Tree[n]) = + backend.stage_preprocess tree + +entry backend_instr_count [n] (tree: Tree[n]): [n]u32 = + backend.stage_instr_count tree + +entry backend_instr_count_make_function_table [n] (tree: Tree[n]) (instr_offset: [n]u32) = + backend.stage_instr_count_make_function_table tree instr_offset + +entry backend_compact_functab [n] (func_id: [n]u32) (func_start: [n]u32) (func_size: [n]u32): [n]FuncInfo = + backend.stage_compact_functab func_id func_start func_size + +entry backend_instr_gen [n] [k] (tree: Tree[n]) (instr_offset: [n]u32) (func_tab: [k]FuncInfo): []Instr = + backend.stage_instr_gen tree instr_offset func_tab + +entry backend_optimize [n] [m] (instr_data: [n]Instr) (func_tab: [m]FuncInfo): ([n]Instr, [m]FuncInfo, [n]bool) = + backend.stage_optimize instr_data func_tab + +entry backend_regalloc [n] [m] (instrs: [n]Instr) (func_tab: [m]FuncInfo) (func_symbols: [m]u32) (optimize_away: [n]bool): ([]Instr, [m]FuncInfo) = + backend.stage_regalloc instrs func_tab func_symbols optimize_away + +entry backend_fix_jumps [n] [m] (instrs: [n]Instr) (func_tab: [m]FuncInfo): ([]Instr, [m]u32, [m]u32, [m]u32) = + backend.stage_fix_jumps instrs func_tab + +entry backend_postprocess [n] (instrs: [n]Instr) = + backend.stage_postprocess instrs diff --git a/src/compiler/module.cpp b/src/compiler/module.cpp new file mode 100644 index 0000000..beba5ce --- /dev/null +++ b/src/compiler/module.cpp @@ -0,0 +1,45 @@ +#include "pareas/compiler/module.hpp" + +#include + +DeviceModule::DeviceModule(futhark_context* ctx): + ctx(ctx), + func_id(nullptr), + func_start(nullptr), + func_size(nullptr), + instructions(nullptr) { +} + +DeviceModule::DeviceModule(DeviceModule&& other): + ctx(std::exchange(other.ctx, nullptr)), + func_id(std::exchange(other.func_id, nullptr)), + func_start(std::exchange(other.func_start, nullptr)), + func_size(std::exchange(other.func_size, nullptr)), + instructions(std::exchange(other.instructions, nullptr)) { +} + +DeviceModule& DeviceModule::operator=(DeviceModule&& other) { + std::swap(this->ctx, other.ctx); + std::swap(this->func_id, other.func_id); + std::swap(this->func_start, other.func_start); + std::swap(this->func_size, other.func_size); + std::swap(this->instructions, other.instructions); + return *this; +} + +DeviceModule::~DeviceModule() { + if (!this->ctx) + return; + + if (this->func_id) + futhark_free_u32_1d(this->ctx, this->func_id); + + if (this->func_start) + futhark_free_u32_1d(this->ctx, this->func_start); + + if (this->func_size) + futhark_free_u32_1d(this->ctx, this->func_size); + + if (this->instructions) + futhark_free_u32_1d(this->ctx, this->instructions); +} diff --git a/src/compiler/passes/check_return_paths.fut b/src/compiler/passes/check_return_paths.fut index a29587d..a863543 100644 --- a/src/compiler/passes/check_return_paths.fut +++ b/src/compiler/passes/check_return_paths.fut @@ -60,30 +60,24 @@ let check_return_paths [n] (node_types: [n]production.t) (parents: [n]i32) (prev |> map (== -1) |> map2 (\parent first_child -> if first_child then parent else -1) parents |> invert - -- We will also need to know whether a function returns void. - let is_void_fn_decl = - map2 - (\nty dty -> nty == production_fn_decl && dty == data_type.void) - node_types - data_types -- Build the boolean expression tree. -- First, produce the initial value and operator. - in map4 + in map3 -- Nodes which have only one child/which pass up their value are simply mapped to #or. - (\nty parent next_sibling is_void_fn_decl -> + (\nty parent next_sibling -> if nty == production_stat_return then #true else if parent == -1 then #or -- Only the second child of an if/else node becomes and-type node. else if node_types[parent] == production_stat_if_else && nty == production_stat_list && next_sibling != -1 then #and - -- Cannot guarantee these types returning, so return false from these + -- Cannot guarantee these types returning, so return false from these. else if node_types[parent] == production_stat_if || node_types[parent] == production_stat_while then #false - else if nty == production_fn_decl then - if is_void_fn_decl then #or else #and + else if nty == production_fn_decl then #true + -- The return type of a void function maps to true as these don't need to end every path with a return statement. + else if nty == production_type_void && node_types[parent] == production_fn_decl then #true else #or) node_types parents next_siblings - is_void_fn_decl -- Now add the children |> zip3 first_childs diff --git a/src/compiler/passes/ids.fut b/src/compiler/passes/ids.fut index 55d8cb5..fd8dcbf 100644 --- a/src/compiler/passes/ids.fut +++ b/src/compiler/passes/ids.fut @@ -12,7 +12,7 @@ import "../../../lib/github.com/diku-dk/segmented/segmented" -- points to. -- This function also computes a function table, which simply contains the amount of declarations indexed -- by function-ID. The required arrays are computed in this function anyway. -let assign_ids [n] (node_types: [n]production.t) (resolution: [n]i32) (data_types: [n]data_type) (data: [n]u32): ([n]u32, []i32) = +let assign_ids [n] (node_types: [n]production.t) (resolution: [n]i32) (data_types: [n]data_type) (data: [n]u32): ([n]u32, []u32) = -- Even though the tree is strictly speaking not in any order right now, there is no pass that -- changes the relative order of the nodes we're interested in (`fn_decl`, `atom_decl` and `param`), -- so we're just going to do a (segmented) scan to assign the IDs. Function IDs will simply be a @@ -66,8 +66,6 @@ let assign_ids [n] (node_types: [n]production.t) (resolution: [n]i32) (data_type is -- Add one to get a maximum declaration instead of a count. (map (+1) decl_ids) - -- Codegen wants this as i32. - |> map i32.u32 -- Insert everything back into the data vector. let data = map4 diff --git a/src/compiler/tree.fut b/src/compiler/tree.fut deleted file mode 100644 index 38a91e0..0000000 --- a/src/compiler/tree.fut +++ /dev/null @@ -1,69 +0,0 @@ -import "datatypes" - -let TREE_SIZE : u32 = 10 - ---Node types -type NodeType = - #invalid | - #statement_list | - #empty_stat | - #func_decl | - #expr_stat | - #if_stat | - #if_else_stat | - #else_aux | - #while_stat | - #func_call_expr | - #func_call_arg | - #add_expr | - #sub_expr | - #mul_expr | - #div_expr | - #mod_expr | - #bitand_expr | - #bitor_expr | - #bitxor_expr | - #lshift_expr | - #rshift_expr | - #urshift_expr | - #land_expr | - #lor_expr | - #eq_expr | - #neq_expr | - #less_expr | - #great_expr | - #lesseq_expr | - #greateq_expr | - #bitnot_expr | - #lnot_expr | - #neg_expr | - #lit_expr | - #cast_expr | - #assign_expr | - #decl_expr | - #id_expr - ---Node definition -type Node = { - node_type: NodeType, - resulting_type: DataType, - parent: u32, - depth: u32 -} - ---Tree definition -type Tree [tree_size] = { - nodes: [tree_size]Node, --Nodes of the tree - max_depth: u32 --Tree depth -} - ---Type definition for the values of the nodes -type NodeValue = u32 --Temporary type - ---Allocates a tree of a given size -let alloc_tree (tree_size: i64) = - let default_node : Node = {node_type = #invalid, resulting_type = #invalid, parent = 0, depth = 0} - in { - nodes = replicate (tree_size) default_node, - max_depth = 0u32 - } \ No newline at end of file