diff --git a/include/pareas/compiler/frontend.hpp b/include/pareas/compiler/frontend.hpp index 3c71a9b..519d190 100644 --- a/include/pareas/compiler/frontend.hpp +++ b/include/pareas/compiler/frontend.hpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace frontend { enum class Error : uint8_t { @@ -33,7 +35,7 @@ namespace frontend { std::runtime_error(error_name(e)) {} }; - DeviceAst compile(futhark_context* ctx, const std::string& input, pareas::Profiler& p); + DeviceAst compile(futhark_context* ctx, const std::string& input, bool verbose_tree, pareas::Profiler& p, std::FILE* debug_log); } #endif diff --git a/include/pareas/compiler/futhark_interop.hpp b/include/pareas/compiler/futhark_interop.hpp index 549959d..e371d94 100644 --- a/include/pareas/compiler/futhark_interop.hpp +++ b/include/pareas/compiler/futhark_interop.hpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -74,6 +73,13 @@ namespace futhark { } } + void clear() { + if (this->data) { + free_fn(this->ctx, this->data); + this->data = nullptr; + } + } + Array* get() { return this->data; } @@ -124,6 +130,10 @@ namespace futhark { throw Error(this->handle.ctx); } + void clear() { + this->handle.clear(); + } + Array* get() { return this->handle.get(); } @@ -150,18 +160,6 @@ namespace futhark { throw Error(this->handle.ctx); } - std::vector download() const { - auto* shape = this->shape(); - int64_t total_size = 1; - for (size_t i = 0; i < N; ++i) { - total_size *= shape[i]; - } - - auto result = std::vector(total_size); - this->values(result.data()); - return result; - } - const int64_t* shape() const { return ArrayTraits::shape_fn(this->handle.ctx, this->handle.data); } diff --git a/include/pareas/json/futhark_interop.hpp b/include/pareas/json/futhark_interop.hpp new file mode 100644 index 0000000..2297266 --- /dev/null +++ b/include/pareas/json/futhark_interop.hpp @@ -0,0 +1,213 @@ +#ifndef _PAREAS_COMPILER_FUTHARK_INTEROP_HPP +#define _PAREAS_COMPILER_FUTHARK_INTEROP_HPP + +#include "json_futhark_generated.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace futhark { + template + struct Deleter { + void operator()(T* t) const { + deleter(t); + } + }; + + template + using Unique = std::unique_ptr>; + + using ContextConfig = Unique; + using Context = Unique; + + inline std::string get_error_str(futhark_context* ctx) { + auto err = futhark_context_get_error(ctx); + if (err) { + auto err_str = std::string(err); + free(err); // leak if the string constructor throws, but whatever. + return err_str; + } + + return "(no diagnostic)"; + } + + struct Error: std::runtime_error { + Error(futhark_context* ctx): + std::runtime_error(get_error_str(ctx)) {} + }; + + template + struct UniqueOpaqueArray { + futhark_context* ctx; + Array* data; + + UniqueOpaqueArray(futhark_context* ctx, Array* data): + ctx(ctx), data(data) { + } + + explicit UniqueOpaqueArray(futhark_context* ctx): + ctx(ctx), data(nullptr) { + } + + UniqueOpaqueArray(UniqueOpaqueArray&& other): + ctx(other.ctx), data(std::exchange(other.data, nullptr)) { + } + + UniqueOpaqueArray& operator=(UniqueOpaqueArray&& other) { + std::swap(this->data, other.data); + std::swap(this->ctx, other.ctx); + return *this; + } + + UniqueOpaqueArray(const UniqueOpaqueArray&) = delete; + UniqueOpaqueArray& operator=(const UniqueOpaqueArray&) = delete; + + ~UniqueOpaqueArray() { + if (this->data) { + free_fn(this->ctx, this->data); + } + } + + void clear() { + if (this->data) { + free_fn(this->ctx, this->data); + this->data = nullptr; + } + } + + Array* get() { + return this->data; + } + + const Array* get() const { + return this->data; + } + + Array** operator&() { + return &this->data; + } + + operator Array*() { + return this->data; + } + + operator const Array*() const { + return this->data; + } + }; + + using UniqueLexTable = UniqueOpaqueArray; + using UniqueParseTable = UniqueOpaqueArray; + using UniqueStackChangeTable = UniqueOpaqueArray; + + template + struct ArrayTraits; + + template + struct UniqueArray { + using Array = typename ArrayTraits::Array; + + UniqueOpaqueArray::free_fn> handle; + + UniqueArray(futhark_context* ctx, Array* data): + handle(ctx, data) { + } + + explicit UniqueArray(futhark_context* ctx): + handle(ctx, nullptr) { + } + + template + UniqueArray(futhark_context* ctx, const T* data, Sizes... dims): + handle(ctx, ArrayTraits::new_fn(ctx, data, dims...)) { + if (!this->handle.data) + throw Error(this->handle.ctx); + } + + void clear() { + this->handle.clear(); + } + + Array* get() { + return this->handle.get(); + } + + const Array* get() const { + return this->handle.get(); + } + + Array** operator&() { + return &this->handle; + } + + operator Array*() { + return this->handle; + } + + operator const Array*() const { + return this->handle; + } + + void values(T* out) const { + int err = ArrayTraits::values_fn(this->handle.ctx, this->handle.data, out); + if (err != 0) + throw Error(this->handle.ctx); + } + + const int64_t* shape() const { + return ArrayTraits::shape_fn(this->handle.ctx, this->handle.data); + } + }; + + template <> + struct ArrayTraits { + using Array = futhark_u8_1d; + constexpr static const auto new_fn = futhark_new_u8_1d; + constexpr static const auto free_fn = futhark_free_u8_1d; + constexpr static const auto shape_fn = futhark_shape_u8_1d; + constexpr static const auto values_fn = futhark_values_u8_1d; + }; + + template <> + struct ArrayTraits { + using Array = futhark_u16_1d; + constexpr static const auto new_fn = futhark_new_u16_1d; + constexpr static const auto free_fn = futhark_free_u16_1d; + constexpr static const auto shape_fn = futhark_shape_u16_1d; + constexpr static const auto values_fn = futhark_values_u16_1d; + }; + + template <> + struct ArrayTraits { + using Array = futhark_u16_2d; + constexpr static const auto new_fn = futhark_new_u16_2d; + constexpr static const auto free_fn = futhark_free_u16_2d; + constexpr static const auto shape_fn = futhark_shape_u16_2d; + constexpr static const auto values_fn = futhark_values_u16_2d; + }; + + template <> + struct ArrayTraits { + using Array = futhark_i32_1d; + constexpr static const auto new_fn = futhark_new_i32_1d; + constexpr static const auto free_fn = futhark_free_i32_1d; + constexpr static const auto shape_fn = futhark_shape_i32_1d; + constexpr static const auto values_fn = futhark_values_i32_1d; + }; + + template <> + struct ArrayTraits { + using Array = futhark_i32_2d; + constexpr static const auto new_fn = futhark_new_i32_2d; + constexpr static const auto free_fn = futhark_free_i32_2d; + constexpr static const auto shape_fn = futhark_shape_i32_2d; + constexpr static const auto values_fn = futhark_values_i32_2d; + }; +} + +#endif diff --git a/meson.build b/meson.build index f13e335..15b43b0 100644 --- a/meson.build +++ b/meson.build @@ -14,6 +14,9 @@ add_project_arguments( language: ['c', 'cpp'], ) +futhark = find_program('futhark') +futhark_wrapper = find_program('src/tools/compile_futhark.py') + inc = include_directories('include') fmt_dep = subproject('fmt').get_variable('fmt_dep') @@ -52,6 +55,7 @@ lpg_sources = files( pareas_lpg_exe = executable( 'pareas-lpg', lpg_sources, + build_by_default: not meson.is_subproject(), dependencies: fmt_dep, include_directories: inc, ) @@ -64,26 +68,29 @@ pareas_prof_dep = declare_dependency( ) # Compiler -dependencies = [pareas_prof_dep, fmt_dep, dependency('threads')] +futhark_deps = [dependency('threads')] # Build futhark library +futhark_wrapper_exe = find_program('src/tools/compile_futhark.py') futhark_backend = get_option('futhark-backend') if futhark_backend == 'opencl' - dependencies += dependency('OpenCL') + futhark_deps += dependency('OpenCL') elif futhark_backend == 'cuda' - dependencies += dependency('cuda', modules: ['cuda', 'cudart', 'nvrtc']) + futhark_deps += dependency('cuda', modules: ['cuda', 'cudart', 'nvrtc']) endif +# Compiler grammar = custom_target( 'grammar', input: ['src/compiler/lexer/pareas.lex', 'src/compiler/parser/pareas.g'], output: ['pareas_grammar.hpp', 'pareas_grammar.cpp', 'pareas_grammar.fut', 'pareas_grammar.S', 'pareas_grammar.dat'], + build_by_default: false, command: [ pareas_lpg_exe, '--lexer', '@INPUT0@', '--parser', '@INPUT1@', - '-o', 'pareas_grammar', + '-o', '@OUTDIR@/pareas_grammar', '--namespace', 'grammar', ], ) @@ -92,7 +99,7 @@ grammar_cpp = grammar[1] grammar_fut = grammar[2] grammar_asm = grammar[3] -futhark_sources = files( +futhark_sources = [ 'lib/github.com/diku-dk/sorts/radix_sort.fut', 'lib/github.com/diku-dk/segmented/segmented.fut', 'src/compiler/string.fut', @@ -116,13 +123,13 @@ futhark_sources = files( 'src/compiler/passes/type_resolution.fut', 'src/compiler/passes/check_return_paths.fut', 'src/compiler/passes/ids.fut', -) +] futhark_compile_command = [ - find_program('src/tools/compile_futhark.py'), + futhark_wrapper_exe, '--futhark', find_program('futhark'), '--futhark-backend', futhark_backend, - '--output', 'futhark_generated', + '--output', '@OUTDIR@/futhark_generated', '--dir', '@PRIVATE_DIR@', '--main', 'src/compiler/frontend.fut', ] @@ -155,8 +162,75 @@ sources = files( pareas_exe = executable( 'pareas', [grammar_hpp, grammar_cpp, grammar_asm, sources, futhark_generated], - install: true, - build_by_default: true, - dependencies: dependencies, + build_by_default: not meson.is_subproject(), + dependencies: [pareas_prof_dep, fmt_dep, futhark_deps], + include_directories: inc, +) + +# JSON test + +json_sources = [ + 'src/json/main.cpp', +] + +json_futhark_sources = [ + 'lib/github.com/diku-dk/sorts/radix_sort.fut', + 'src/compiler/string.fut', + 'src/json/main.fut', + 'src/compiler/lexer/lexer.fut', + 'src/compiler/parser/binary_tree.fut', + 'src/compiler/parser/bracket_matching.fut', + 'src/compiler/parser/parser.fut', + 'src/compiler/util.fut', +] + +json_grammar = custom_target( + 'json-grammar', + input: ['src/json/json.lex', 'src/json/json.g'], + output: ['json_grammar.hpp', 'json_grammar.cpp', 'json_grammar.fut', 'json_grammar.S', 'json_grammar.dat'], + command: [ + pareas_lpg_exe, + '--lexer', '@INPUT0@', + '--parser', '@INPUT1@', + '-o', '@OUTDIR@/json_grammar', + '--namespace', 'json', + ], +) +json_grammar_hpp = json_grammar[0] +json_grammar_cpp = json_grammar[1] +json_grammar_fut = json_grammar[2] +json_grammar_asm = json_grammar[3] + +json_futhark_compile_command = [ + futhark_wrapper, + '--futhark', futhark, + '--futhark-backend', futhark_backend, + '--output', '@OUTDIR@/json_futhark_generated', + '--dir', '@PRIVATE_DIR@', + '--main', 'src/json/main.fut', +] + +json_inputs = [] + +foreach source : json_futhark_sources + json_futhark_compile_command += ['-f', '@INPUT@0@@'.format(json_inputs.length()), source] + json_inputs += source +endforeach + +json_futhark_compile_command += ['-f', '@INPUT@0@@'.format(json_inputs.length()), 'gen/json_grammar.fut'] +json_inputs += json_grammar_fut + +json_futhark_generated = custom_target( + 'json-futhark', + input: json_inputs, + output: ['json_futhark_generated.c', 'json_futhark_generated.h'], + command: json_futhark_compile_command, +) + +pareas_json_exe = executable( + 'pareas-json', + [json_grammar_hpp, json_grammar_cpp, json_grammar_asm, json_sources, json_futhark_generated], + build_by_default: not meson.is_subproject(), + dependencies: [pareas_prof_dep, fmt_dep, futhark_deps], include_directories: inc, ) diff --git a/meson_options.txt b/meson_options.txt index f2e7197..b1a5364 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1 +1 @@ -option('futhark-backend', type: 'combo', choices: ['c', 'multicore', 'opencl', 'cuda'], value: 'c', description: 'Select the backend that Futhark code compiles to') +option('futhark-backend', type: 'combo', choices: ['c', 'multicore', 'opencl', 'cuda'], value: 'c', description: 'Select the backend that Futhark code compiles to', yield: true) diff --git a/src/compiler/frontend.cpp b/src/compiler/frontend.cpp index 798b30e..23f7b8c 100644 --- a/src/compiler/frontend.cpp +++ b/src/compiler/frontend.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace { futhark::UniqueLexTable upload_lex_table(futhark_context* ctx) { auto initial_state = futhark::UniqueArray( @@ -84,7 +86,13 @@ namespace frontend { } } - DeviceAst compile(futhark_context* ctx, const std::string& input, pareas::Profiler& p) { + DeviceAst compile(futhark_context* ctx, const std::string& input, bool verbose_tree, pareas::Profiler& p, std::FILE* debug_log) { + auto debug_log_region = [&](const char* name) { + if (debug_log) + fmt::print(debug_log, "<<<{}>>>\n", name); + }; + + debug_log_region("upload"); p.begin(); p.begin(); auto lex_table = upload_lex_table(ctx); @@ -110,13 +118,22 @@ namespace frontend { p.begin(); + debug_log_region("tokenize"); auto tokens = futhark::UniqueTokenArray(ctx); p.measure("tokenize", [&]{ int err = futhark_entry_frontend_tokenize(ctx, &tokens, input_array, lex_table); if (err) throw futhark::Error(ctx); }); + lex_table.clear(); + + if (verbose_tree) { + int32_t result; + futhark_entry_frontend_num_tokens(ctx, &result, tokens); + fmt::print(std::cerr, "Tokens: {}\n", result); + } + debug_log_region("parse"); auto node_types = futhark::UniqueArray(ctx); p.measure("parse", [&]{ bool valid = false; @@ -126,15 +143,25 @@ namespace frontend { if (!valid) throw CompileError(Error::PARSE_ERROR); }); + sct.clear(); + pt.clear(); + + if (verbose_tree) { + fmt::print(std::cerr, "Initial nodes: {}\n", node_types.shape()[0]); + } + debug_log_region("build parse tree"); auto parents = futhark::UniqueArray(ctx); p.measure("build parse tree", [&]{ int err = futhark_entry_frontend_build_parse_tree(ctx, &parents, node_types, arity_array); if (err) throw futhark::Error(ctx); }); + arity_array.clear(); + p.begin(); + debug_log_region("syntax"); p.measure("fix bin ops", [&]{ auto old_node_types = std::move(node_types); auto old_parents = std::move(parents); @@ -143,6 +170,10 @@ namespace frontend { throw futhark::Error(ctx); }); + if (verbose_tree) { + fmt::print(std::cerr, "Nodes after fix bin ops: {}\n", node_types.shape()[0]); + } + p.measure("fix conditionals", [&]{ auto old_node_types = std::move(node_types); auto old_parents = std::move(parents); @@ -235,6 +266,7 @@ namespace frontend { p.end("syntax"); p.begin(); + debug_log_region("sema"); p.measure("insert derefs", [&]{ auto old_node_types = std::move(node_types); auto old_parents = std::move(parents); @@ -250,6 +282,7 @@ namespace frontend { if (err) throw futhark::Error(ctx); }); + input_array.clear(); auto resolution = futhark::UniqueArray(ctx); p.measure("resolve vars", [&]{ @@ -306,7 +339,7 @@ namespace frontend { if (err) throw futhark::Error(ctx); if (!valid) - throw CompileError(Error::INVALID_RETURN); + throw CompileError(Error::MISSING_RETURN); }); auto ast = DeviceAst(ctx); @@ -335,6 +368,11 @@ namespace frontend { p.end("sema"); p.end("compile"); + if (verbose_tree) { + fmt::print(std::cerr, "Final nodes: {}\n", ast.num_nodes()); + fmt::print(std::cerr, "Functions: {}\n", ast.num_functions()); + } + return ast; } } diff --git a/src/compiler/frontend.fut b/src/compiler/frontend.fut index e5eaf0c..3174ef5 100644 --- a/src/compiler/frontend.fut +++ b/src/compiler/frontend.fut @@ -46,6 +46,8 @@ type token = (token.t, i32, i32) entry frontend_tokenize (input: []u8) (lt: lex_table []): []token = tokenize input lt +entry frontend_num_tokens [n] (_: [n]token): i32 = i32.i64 n + entry frontend_parse (tokens: []token) (sct: stack_change_table []) (pt: parse_table []): (bool, []production.t) = let token_types = map (.0) tokens in if pareas_parser.check token_types sct diff --git a/src/compiler/lexer/pareas.lex b/src/compiler/lexer/pareas.lex index ceebd8f..91929a0 100644 --- a/src/compiler/lexer/pareas.lex +++ b/src/compiler/lexer/pareas.lex @@ -43,8 +43,8 @@ colon = /:/ # For this case, we differentiate binary minus from unary minus based on the tokens # that precede it. # Also define a special binary whitespace token so that we can lex `a - b`. -binary_minus_whitespace = /[ \t\r\n]+/ [rparen, name, float_literal, int_literal] -binary_minus = /-/ [rparen, name, float_literal, int_literal, binary_minus_whitespace] +binary_minus_whitespace = /[ \t\r\n]+/ [rparen, rbracket, name, float_literal, int_literal] +binary_minus = /-/ [rparen, rbracket, name, float_literal, int_literal, binary_minus_whitespace] ## Parenthesis rparen = /\)/ diff --git a/src/compiler/main.cpp b/src/compiler/main.cpp index 38213e6..2f6a2d7 100644 --- a/src/compiler/main.cpp +++ b/src/compiler/main.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -27,8 +28,10 @@ struct Options { bool help; bool dump_dot; unsigned profile; + bool verbose_tree; bool futhark_verbose; bool futhark_debug; + bool futhark_debug_extra; // Options available for the multicore backend int threads; @@ -40,15 +43,18 @@ struct Options { void print_usage(char* progname) { fmt::print( - "Usage: {} [options...] \n" + "Usage: {} [options...] \n" "Available options:\n" "-o --output Write the output to . (default: b.out)\n" "-h --help Show this message and exit.\n" "--dump-dot Dump tree as dot graph.\n" "-p --profile Record (non-futhark) profiling information.\n" + "--verbose-tree Dump some information about the tree to stderr.\n" " (default: 0, =disabled)\n" "--futhark-verbose Enable Futhark logging.\n" "--futhark-debug Enable Futhark debug logging.\n" + "--futhark-debug-extra Futhark debug logging with extra information.\n" + " Not compatible with --futhark-debug.\n" #if defined(FUTHARK_BACKEND_multicore) "Available backend options:\n" "-t --threads Set the maximum number of threads that may be used\n" @@ -75,8 +81,10 @@ bool parse_options(Options* opts, int argc, char* argv[]) { .help = false, .dump_dot = false, .profile = 0, + .verbose_tree = false, .futhark_verbose = false, .futhark_debug = false, + .futhark_debug_extra = false, .threads = 0, .device_name = nullptr, .futhark_profile = false, @@ -130,10 +138,14 @@ bool parse_options(Options* opts, int argc, char* argv[]) { } profile_arg = argv[i]; + } else if (arg == "--verbose-tree") { + opts->verbose_tree = true; } else if (arg == "--futhark-verbose") { opts->futhark_verbose = true; } else if (arg == "--futhark-debug") { opts->futhark_debug = true; + } else if (arg == "--futhark-debug-extra") { + opts->futhark_debug_extra = true; } else if (!opts->input_path) { opts->input_path = argv[i]; } else { @@ -151,6 +163,9 @@ bool parse_options(Options* opts, int argc, char* argv[]) { } else if (!opts->input_path[0]) { fmt::print(std::cerr, "Error: may not be empty\n"); return false; + } else if (opts->futhark_debug && opts->futhark_debug_extra) { + fmt::print(std::cerr, "Error: --futhark-debug is incompatible with --futhark-debug-extra\n"); + return false; } if (!opts->output_path[0]) { @@ -213,7 +228,7 @@ int main(int argc, char* argv[]) { p.begin(); auto config = futhark::ContextConfig(futhark_context_config_new()); futhark_context_config_set_logging(config.get(), opts.futhark_verbose); - futhark_context_config_set_debugging(config.get(), opts.futhark_debug); + futhark_context_config_set_debugging(config.get(), opts.futhark_debug || opts.futhark_debug_extra); #if defined(FUTHARK_BACKEND_multicore) futhark_context_config_set_num_threads(config.get(), opts.threads); @@ -226,6 +241,7 @@ int main(int argc, char* argv[]) { #endif auto ctx = futhark::Context(futhark_context_new(config.get())); + futhark_context_set_logging_file(ctx.get(), stderr); p.set_sync_callback([ctx = ctx.get()]{ if (futhark_context_sync(ctx)) throw futhark::Error(ctx); @@ -234,26 +250,23 @@ int main(int argc, char* argv[]) { try { p.begin(); - auto ast = frontend::compile(ctx.get(), input, p); + auto ast = frontend::compile(ctx.get(), input, opts.verbose_tree, p, opts.futhark_debug_extra ? stderr : nullptr); p.end("frontend"); if (opts.profile > 0) p.dump(std::cout); - fmt::print(std::cerr, "{} nodes\n", ast.num_nodes()); - if (opts.dump_dot) { + p.begin(); auto host_ast = ast.download(); + p.end("ast download"); host_ast.dump_dot(std::cout); } if (opts.futhark_profile) { auto report = MallocPtr(futhark_context_report(ctx.get())); - fmt::print("Futhark profile report:\n{}", report); + fmt::print(std::cerr, "Futhark profile report:\n{}", report); } - - if (futhark_context_sync(ctx.get()) != 0) - throw futhark::Error(ctx.get()); } catch (const frontend::CompileError& err) { fmt::print(std::cerr, "Compile error: {}\n", err.what()); return EXIT_FAILURE; diff --git a/src/compiler/passes/check_return_paths.fut b/src/compiler/passes/check_return_paths.fut index 40b812d..a29587d 100644 --- a/src/compiler/passes/check_return_paths.fut +++ b/src/compiler/passes/check_return_paths.fut @@ -68,19 +68,22 @@ let check_return_paths [n] (node_types: [n]production.t) (parents: [n]i32) (prev data_types -- Build the boolean expression tree. -- First, produce the initial value and operator. - in map3 + in map4 -- Nodes which have only one child/which pass up their value are simply mapped to #or. - (\nty parent next_sibling -> + (\nty parent next_sibling is_void_fn_decl -> if nty == production_stat_return then #true else if parent == -1 then #or -- Only the second child of an if/else node becomes and-type node. else if node_types[parent] == production_stat_if_else && nty == production_stat_list && next_sibling != -1 then #and -- Cannot guarantee these types returning, so return false from these else if node_types[parent] == production_stat_if || node_types[parent] == production_stat_while then #false + else if nty == production_fn_decl then + if is_void_fn_decl then #or else #and else #or) node_types parents next_siblings + is_void_fn_decl -- Now add the children |> zip3 first_childs @@ -89,13 +92,8 @@ let check_return_paths [n] (node_types: [n]production.t) (parents: [n]i32) (prev |> iterate (n |> i32.i64 |> bit_width) iter - -- Fetch the result value. At this point we know that all operators must be #value. - |> map (.2) - |> map (== #true) - -- All function declaration nodes need to have 'true' unless they return void. - |> map2 (||) is_void_fn_decl - |> map2 - (||) - (map (!= production_fn_decl) node_types) - -- And finally this must hold for all nodes. - |> reduce (&&) true + -- At this point, the first node (the fn_decl_list, which is the first node since the last compactify stage) + -- holds whether the program is correct. + |> (.[0]) + |> (.2) + |> (== #true) diff --git a/src/compiler/passes/ids.fut b/src/compiler/passes/ids.fut index 96dc7bb..55d8cb5 100644 --- a/src/compiler/passes/ids.fut +++ b/src/compiler/passes/ids.fut @@ -19,6 +19,7 @@ let assign_ids [n] (node_types: [n]production.t) (resolution: [n]i32) (data_type -- normal exclusive scan, and the function declarations also acts as flag to reset the counters for -- the `param` and `decl` counters. let is_fn_decl = map (== production_fn_decl) node_types + let num_fn_decls = is_fn_decl |> map u32.bool |> reduce (+) 0 let fn_ids = is_fn_decl |> map u32.bool @@ -61,7 +62,7 @@ let assign_ids [n] (node_types: [n]production.t) (resolution: [n]i32) (data_type |> map2 (\fn_id is_last_decl -> if is_last_decl then i64.u32 fn_id else -1) fn_ids in scatter - (replicate n 0u32) + (replicate (i64.u32 num_fn_decls) 0u32) is -- Add one to get a maximum declaration instead of a count. (map (+1) decl_ids) diff --git a/src/compiler/passes/symbol_resolution.fut b/src/compiler/passes/symbol_resolution.fut index 436f2e0..140e85a 100644 --- a/src/compiler/passes/symbol_resolution.fut +++ b/src/compiler/passes/symbol_resolution.fut @@ -59,7 +59,7 @@ let resolve_fns [n] (node_types: [n]production.t) (data: [n]u32): (bool, [n]i32) let resolution = data |> map i32.u32 - |> map2 (\is_call name_id -> if is_call then copy fn_decl_by_name[name_id] else -1) is_fn_call + |> map2 (\is_call name_id -> if is_call then fn_decl_by_name[name_id] else -1) is_fn_call -- These must all yield something other than -1. let calls_valid = resolution diff --git a/src/compiler/passes/tokenize.fut b/src/compiler/passes/tokenize.fut index 4a3ed06..5f813ac 100644 --- a/src/compiler/passes/tokenize.fut +++ b/src/compiler/passes/tokenize.fut @@ -41,13 +41,13 @@ local let parse_float (input: []u8) ((_, offset, len): tokenref): f32 = -- for annoying string operations further in the compiler, and allows us to simply query and compare the IDs. -- For now, this implementation does a rather simply fixed-length radix sort, as names are not supposed to be -- very long. Some optimizations are done though, as names can only consist of a-zA-Z0-9_ (63 characters), --- we only need to sort on 5 instead of 8 bits per characters. +-- we only need to sort on 6 instead of 8 bits per characters. -- IDs are assigned sequentially starting from 0. local let link_names [n] (input: []u8) (tokens: [n]tokenref): [n]u32 = let (_, offsets, lengths) = unzip3 tokens - -- a-zA-Z0-9_ are 26 + 26 + 10 + 1 = 63 characters, so 5 bits will do. - let bits_per_char = 5 - -- Map characters allowed in a function name to its 5-bit representation. + -- a-zA-Z0-9_ are 26 + 26 + 10 + 1 = 63 characters, so 6 bits will do. + let bits_per_char = 6 + -- Map characters allowed in a function name to its 6-bit representation. let char_to_value (c: u8): u8 = if c >= 'a' && c <= 'z' then c - 'a' else if c >= 'A' && c <= 'Z' then c - 'A' + ('z' - 'a' + 1) diff --git a/src/json/json.g b/src/json/json.g new file mode 100644 index 0000000..f71c3f9 --- /dev/null +++ b/src/json/json.g @@ -0,0 +1,18 @@ +json -> value; + +value [string] -> 'string' maybe_member; +value [number] -> 'number'; +value [true] -> 'true'; +value [false] -> 'false'; +value [nul] -> 'nul'; +value [object] -> 'lbrace' maybe_values 'rbrace'; +value [array] -> 'lbracket' maybe_values 'rbracket'; + +maybe_values [values] -> value value_list; +maybe_values [no_values]-> ; + +value_list -> 'comma' value value_list; +value_list [value_list_end] -> ; + +maybe_member [member] -> 'colon' value; +maybe_member [no_member] -> ; diff --git a/src/json/json.lex b/src/json/json.lex new file mode 100644 index 0000000..03412a4 --- /dev/null +++ b/src/json/json.lex @@ -0,0 +1,14 @@ +lbrace = /{/ +rbrace = /}/ +lbracket = /\[/ +rbracket = /\]/ +true = /true/ +false = /false/ +nul = /null/ +colon = /:/ +comma = /,/ + +number = /-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+\-]?[0-9]+)?/ +string = /"([^"\\\x00-\x1F\x7F]|\\(["\\/bfnrt]|u[0-9a-f][0-9a-f][0-9a-f][0-9a-f]))*"/ + +whitespace = /[ \n\r\t]+/ diff --git a/src/json/main.cpp b/src/json/main.cpp new file mode 100644 index 0000000..e8dc02d --- /dev/null +++ b/src/json/main.cpp @@ -0,0 +1,439 @@ +#include "json_futhark_generated.h" +#include "json_grammar.hpp" + +#include "pareas/json/futhark_interop.hpp" +#include "pareas/profiler/profiler.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +// This file is mostly just copied from src/compiler/main.cpp + +struct Options { + const char* input_path; + bool help; + bool futhark_verbose; + bool futhark_debug; + bool futhark_debug_extra; + bool dump_dot; + bool verbose_tree; + + // Options available for the multicore backend + int threads; + + // Options abailable for the OpenCL and CUDA backends + const char* device_name; + bool futhark_profile; +}; + +void print_usage(char* progname) { + fmt::print( + "Usage: {} [options...] \n" + "Available options:\n" + "-h --help Show this message and exit.\n" + "--futhark-verbose Enable Futhark logging.\n" + "--futhark-debug Enable Futhark debug logging.\n" + "--futhark-debug-extra Futhark debug logging with extra information.\n" + " Not compatible with --futhark-debug.\n" + "--dump-dot Dump JSON tree as dot graph. Disables profiling.\n" + "--verbose-tree Print some information about the document tree.\n" + #if defined(FUTHARK_BACKEND_multicore) + "Available backend options:\n" + "-t --threads Set the maximum number of threads that may be used\n" + " (default: amount of cores).\n" + #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) + "Available backend options:\n" + "--device Select the device that kernels are executed on. Any\n" + " device which name contains may be used. The\n" + " special value #k may be used to select the k-th\n" + " device reported by the platform.\n" + "--futhark-profile Enable Futhark profiling and print report at exit.\n" + #endif + "\n" + "When is '-', standard input is used\n", + progname + ); +} + +bool parse_options(Options* opts, int argc, char* argv[]) { + *opts = { + .input_path = nullptr, + .help = false, + .futhark_verbose = false, + .futhark_debug = false, + .futhark_debug_extra = false, + .dump_dot = false, + .verbose_tree = false, + .threads = 0, + .device_name = nullptr, + .futhark_profile = false, + }; + + const char* threads_arg = nullptr; + + for (int i = 1; i < argc; ++i) { + auto arg = std::string_view(argv[i]); + + #if defined(FUTHARK_BACKEND_multicore) + if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + fmt::print(std::cerr, "Error: Expected argument to option {}\n", arg); + return false; + } + + threads_arg = argv[i]; + continue; + } + #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) + if (arg == "-d" || arg == "--device") { + if (++i >= argc) { + fmt::print(std::cerr, "Error: Expected argument to option {}\n", arg); + return false; + } + + opts->device_name = argv[i]; + continue; + } else if (arg == "--futhark-profile") { + opts->futhark_profile = true; + continue; + } + #endif + + if (arg == "-h" || arg == "--help") { + opts->help = true; + } else if (arg == "--futhark-verbose") { + opts->futhark_verbose = true; + } else if (arg == "--futhark-debug") { + opts->futhark_debug = true; + } else if (arg == "--futhark-debug-extra") { + opts->futhark_debug_extra = true; + } else if (arg == "--dump-dot") { + opts->dump_dot = true; + } else if (arg == "--verbose-tree") { + opts->verbose_tree = true; + } else if (!opts->input_path) { + opts->input_path = argv[i]; + } else { + fmt::print(std::cerr, "Error: Unknown option {}\n", arg); + return false; + } + } + + if (opts->help) + return true; + + if (!opts->input_path) { + fmt::print(std::cerr, "Error: Missing required argument \n"); + return false; + } else if (!opts->input_path[0]) { + fmt::print(std::cerr, "Error: may not be empty\n"); + return false; + } else if (opts->futhark_debug && opts->futhark_debug_extra) { + fmt::print(std::cerr, "Error: --futhark-debug is incompatible with --futhark-debug-extra\n"); + return false; + } + + if (threads_arg) { + const auto* end = threads_arg + std::strlen(threads_arg); + auto [p, ec] = std::from_chars(threads_arg, end, opts->threads); + if (ec != std::errc() || p != end || opts->threads < 1) { + fmt::print(std::cerr, "Error: Invalid value '{}' for option --threads\n", threads_arg); + return false; + } + } + + return true; +} + +template +struct Free { + void operator()(T* ptr) const { + free(static_cast(ptr)); + } +}; + +template +using MallocPtr = std::unique_ptr>; + +futhark::UniqueLexTable upload_lex_table(futhark_context* ctx) { + auto initial_state = futhark::UniqueArray( + ctx, + reinterpret_cast(json::lex_table.initial_states), + json::LexTable::NUM_INITIAL_STATES + ); + + auto merge_table = futhark::UniqueArray( + ctx, + reinterpret_cast(json::lex_table.merge_table), + json::lex_table.n, + json::lex_table.n + ); + + auto final_state = futhark::UniqueArray( + ctx, + reinterpret_cast*>(json::lex_table.final_states), + json::lex_table.n + ); + + auto lex_table = futhark::UniqueLexTable(ctx); + + int err = futhark_entry_mk_lex_table( + ctx, + &lex_table, + initial_state.get(), + merge_table.get(), + final_state.get() + ); + + if (err) + throw futhark::Error(ctx); + + return lex_table; +} + +template +T upload_strtab(futhark_context* ctx, const json::StrTab& strtab, F upload_fn) { + static_assert(sizeof(U) == sizeof(uint8_t)); + + auto table = futhark::UniqueArray( + ctx, + reinterpret_cast(strtab.table), + strtab.n + ); + + auto offsets = futhark::UniqueArray(ctx, strtab.offsets, json::NUM_TOKENS, json::NUM_TOKENS); + auto lengths = futhark::UniqueArray(ctx, strtab.lengths, json::NUM_TOKENS, json::NUM_TOKENS); + + auto tab = T(ctx); + + int err = upload_fn(ctx, &tab, table.get(), offsets.get(), lengths.get()); + if (err != 0) + throw futhark::Error(ctx); + + return tab; +} + +struct JsonTree { + size_t num_nodes; + std::unique_ptr node_types; + std::unique_ptr parents; +}; + +void dump_dot(const JsonTree& j, std::ostream& os) { + fmt::print(os, "digraph json {{\n"); + + for (size_t i = 0; i < j.num_nodes; ++i) { + auto prod = j.node_types[i]; + auto parent = j.parents[i]; + auto* name = json::production_name(prod); + + if (parent == i) + continue; + + fmt::print(os, "node{} [label=\"{}\nindex={}\"]\n", i, name, i); + + if (parent >= 0) { + fmt::print(os, "node{} -> node{};\n", parent, i); + } else { + fmt::print(os, "start{0} [style=invis];\nstart{0} -> node{0};\n", i); + } + } + + fmt::print(os, "}}\n"); +} + +JsonTree parse(futhark_context* ctx, const std::string& input, bool verbose_tree, pareas::Profiler& p, std::FILE* debug_log) { + auto debug_log_region = [&](const char* name) { + if (debug_log) + fmt::print(debug_log, "<<<{}>>>\n", name); + }; + + debug_log_region("upload"); + p.begin(); + p.begin(); + auto lex_table = upload_lex_table(ctx); + + auto sct = upload_strtab( + ctx, + json::stack_change_table, + futhark_entry_mk_stack_change_table + ); + + auto pt = upload_strtab( + ctx, + json::parse_table, + futhark_entry_mk_parse_table + ); + + auto arity_array = futhark::UniqueArray(ctx, json::arities, json::NUM_PRODUCTIONS); + p.end("table"); + + p.begin(); + auto input_array = futhark::UniqueArray(ctx, reinterpret_cast(input.data()), input.size()); + p.end("input"); + p.end("upload"); + + p.begin(); + + debug_log_region("tokenize"); + auto tokens = futhark::UniqueArray(ctx); + p.measure("tokenize", [&]{ + int err = futhark_entry_json_lex(ctx, &tokens, input_array, lex_table); + if (err) + throw futhark::Error(ctx); + }); + input_array.clear(); + lex_table.clear(); + + if (verbose_tree) { + fmt::print(std::cerr, "Num tokens: {}\n", tokens.shape()[0]); + } + + debug_log_region("parse"); + auto node_types = futhark::UniqueArray(ctx); + p.measure("parse", [&]{ + bool valid = false; + int err = futhark_entry_json_parse(ctx, &valid, &node_types, tokens, sct, pt); + if (err) + throw futhark::Error(ctx); + if (!valid) + throw std::runtime_error("Parse error"); + }); + sct.clear(); + pt.clear(); + + debug_log_region("build parse tree"); + auto parents = futhark::UniqueArray(ctx); + p.measure("build parse tree", [&]{ + int err = futhark_entry_json_build_parse_tree(ctx, &parents, node_types, arity_array); + if (err) + throw futhark::Error(ctx); + }); + + if (verbose_tree) { + fmt::print(std::cerr, "Initial nodes: {}\n", node_types.shape()[0]); + } + + debug_log_region("restructure"); + p.measure("restructure", [&]{ + auto old_node_types = std::move(node_types); + auto old_parents = std::move(parents); + int err = futhark_entry_json_restructure(ctx, &node_types, &parents, old_node_types, old_parents); + if (err) + throw futhark::Error(ctx); + }); + + debug_log_region("validate"); + p.measure("validate", [&]{ + bool valid; + int err = futhark_entry_json_validate(ctx, &valid, node_types, parents); + if (err) + throw futhark::Error(ctx); + if (!valid) + throw std::runtime_error("Invalid structure"); + }); + + p.end("json"); + + size_t num_nodes = node_types.shape()[0]; + + auto ast = JsonTree { + .num_nodes = num_nodes, + .node_types = std::make_unique(num_nodes), + .parents = std::make_unique(num_nodes), + }; + + int err = futhark_values_u8_1d( + ctx, + node_types, + reinterpret_cast*>(ast.node_types.get()) + ); + + err |= futhark_values_i32_1d(ctx, parents, ast.parents.get()); + + if (err) + throw futhark::Error(ctx); + + if (verbose_tree) { + fmt::print(std::cerr, "Nodes: {}\n", num_nodes); + } + + return ast; +} + +int main(int argc, char* argv[]) { + Options opts; + if (!parse_options(&opts, argc, argv)) { + fmt::print(std::cerr, "See '{} --help' for usage\n", argv[0]); + return EXIT_FAILURE; + } else if (opts.help) { + print_usage(argv[0]); + return EXIT_SUCCESS; + } + + auto p = pareas::Profiler(9999); + + auto in = std::ifstream(opts.input_path, std::ios::binary); + if (!in) { + fmt::print(std::cerr, "Error: Failed to open input file '{}'\n", opts.input_path); + return EXIT_FAILURE; + } + + auto input = std::string(std::istreambuf_iterator(in), std::istreambuf_iterator()); + in.close(); + + p.begin(); + auto config = futhark::ContextConfig(futhark_context_config_new()); + + futhark_context_config_set_logging(config.get(), opts.futhark_verbose); + futhark_context_config_set_debugging(config.get(), opts.futhark_debug || opts.futhark_debug_extra); + + #if defined(FUTHARK_BACKEND_multicore) + futhark_context_config_set_num_threads(config.get(), opts.threads); + #elif defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) + if (opts.device_name) { + futhark_context_config_set_device(config.get(), opts.device_name); + } + + futhark_context_config_set_profiling(config.get(), opts.futhark_profile); + #endif + + auto ctx = futhark::Context(futhark_context_new(config.get())); + futhark_context_set_logging_file(ctx.get(), stderr); + p.set_sync_callback([ctx = ctx.get()]{ + if (futhark_context_sync(ctx)) + throw futhark::Error(ctx); + }); + p.end("context init"); + + try { + auto ast = parse(ctx.get(), input, opts.verbose_tree, p, opts.futhark_debug_extra ? stderr : nullptr); + + if (opts.dump_dot) + dump_dot(ast, std::cout); + else + p.dump(std::cout); + + if (opts.futhark_profile) { + auto report = MallocPtr(futhark_context_report(ctx.get())); + fmt::print(std::cerr, "Profile report:\n{}", report); + } + } catch (const std::runtime_error& err) { + fmt::print(std::cerr, "Error: {}\n", err.what()); + return EXIT_FAILURE; + } catch (const futhark::Error& err) { + fmt::print(std::cerr, "Futhark error: {}\n", err.what()); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/json/main.fut b/src/json/main.fut new file mode 100644 index 0000000..9fa4298 --- /dev/null +++ b/src/json/main.fut @@ -0,0 +1,147 @@ +module lexer = import "../compiler/lexer/lexer" +import "../compiler/parser/parser" +import "../compiler/util" + +module g = import "../../gen/json_grammar" +local open g + +module json_parser = parser g + +type~ lex_table [n] = lexer.lex_table [n] token.t +type~ stack_change_table [n] = json_parser.stack_change_table [n] +type~ parse_table [n] = json_parser.parse_table [n] +type~ arity_array = json_parser.arity_array + +entry mk_lex_table [n] (is: [256]lexer.state) (mt: [n][n]lexer.state) (fs: [n]token.t): lex_table [n] + = lexer.mk_lex_table is mt fs identity_state + +entry mk_stack_change_table [n] + (table: [n]bracket.t) + (offsets: [num_tokens][num_tokens]i32) + (lengths: [num_tokens][num_tokens]i32): stack_change_table [n] + = mk_strtab table offsets lengths + +entry mk_parse_table [n] + (table: [n]production.t) + (offsets: [num_tokens][num_tokens]i32) + (lengths: [num_tokens][num_tokens]i32): parse_table [n] + = mk_strtab table offsets lengths + +-- Code taken from pareas itself +-- See compiler/passes/util.fut and compiler/passes/compactify.fut for more info + +let find_unmarked_parents_log [n] (parents: [n]i32) (marks: [n]bool): [n]i32 = + iterate + (n |> i32.i64 |> bit_width) + (\links -> + map + (\link -> if link == -1 || !marks[link] then link else links[link]) + links) + parents + +let remove_nodes_log [n] (parents: [n]i32) (remove: [n]bool): [n]i32 = + find_unmarked_parents_log parents remove + |> map3 + (\i remove parent -> if remove then i else parent) + (iota n |> map i32.i64) + remove + +let compactify [n] (parents: [n]i32): [](i32, i32) = + -- TODO: Mark all nodes of deleted subtrees as deleted by setting their parents to themselves. + -- Make a mask specifying whether a node should be included in the new tree. + let include_mask = + iota n + |> map i32.i64 + |> zip parents + |> map (\(i, parent) -> parent != i) + let is = + include_mask + |> map i32.bool + |> scan (+) 0 + -- break up the computation of is temporarily to get the size of the new arrays. + let m = last is |> i64.i32 + -- For a node index i in the old array, this array gives the position in the new array (which should be of size m) + let new_index = + map2 (\inc i -> if inc then i else -1) include_mask is + |> map (+ -1) + -- For a node index j in the new array, this gives the position in the old array + let old_index = + scatter + (replicate m 0i32) + (new_index |> map i64.i32) + (iota n |> map i32.i64) + -- Also compute the new parents array here, since we need the `is` array for it, but dont need it anywhere else. + let parents = + -- Begin with the indices into the old array + old_index + -- Gather its parent, which points to an index into the old array as well + |> gather parents + -- Find the index into the new array + |> map (\i -> if i == -1 then -1 else new_index[i]) + in zip parents old_index + +-- Json entry points + +entry json_lex (input: []u8) (lt: lex_table []): []token.t = + lexer.lex input lt + |> map (.0) + |> filter (!= token_whitespace) + +entry json_parse (tokens: []token.t) (sct: stack_change_table []) (pt: parse_table []): (bool, []production.t) = + if json_parser.check tokens sct + then (true, json_parser.parse tokens pt) + else (false, []) + +entry json_build_parse_tree [n] (node_types: [n]production.t) (arities: arity_array): []i32 = + json_parser.build_parent_vector node_types arities + +-- | Restructure the json tree: +-- - Non relevant nodes are removed. +-- - Lists are flattened. +-- - String->member pairs are squashed. +-- - Tree is compactified. +entry json_restructure [n] (node_types: [n]production.t) (parents: [n]i32): ([]production.t, []i32) = + let parents = + node_types + |> map (\nty -> nty == production_values + || nty == production_value_list + || nty == production_value_list_end + || nty == production_no_member + || nty == production_no_values) + |> remove_nodes_log parents + -- Squash string->member to member. + -- Note: member can only have a string as parent. For hypothetical lexeme extraction, the member node would + -- be matched with the string's lexeme. + let parents = + let is_member = map (== production_member) node_types + let is = + map2 (\parent is_member -> if is_member then parent else -1) parents is_member + |> map i64.i32 + let strings_to_remove = scatter + (replicate n false) + is + (replicate n true) + let get_new_parent self parent is_string_to_remove is_member = + if is_string_to_remove then self + else if is_member then parents[parent] + else parent + in + map4 + get_new_parent + (iota n |> map i32.i64) + parents + strings_to_remove + is_member + -- *really* remove the old nodes + let (parents, old_index) = compactify parents |> unzip + let node_types = gather node_types old_index + in (node_types, parents) + +-- | Validate that the children of objects are members, and the parents of members are objects. +entry json_validate [n] (node_types: [n]production.t) (parents: [n]i32): bool = + map2 + (\nty parent -> (nty == production_member) == (parent != -1 && node_types[parent] == production_object)) + node_types + parents + |> reduce (&&) true +