From 5b74afbf33be17601e2cbfa13de7e242eaff369d Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Thu, 12 May 2022 10:19:10 +0200 Subject: [PATCH] [LLVM][GPU] Atomic updates support (#853) 1. Helper visitor generates atomic statements with += and -= once again. 2. LLVM visitor now knows how to lower atomic statements for CPUs (trivially) and GPUs. 3. A corresponding IR test was added. `expsyn` test on GPU enabled Co-authored-by: Ioannis Magkanaris --- .../llvm/codegen_llvm_helper_visitor.cpp | 39 ++------- src/codegen/llvm/codegen_llvm_visitor.cpp | 84 ++++++++++++++----- src/codegen/llvm/llvm_ir_builder.cpp | 22 +++++ src/codegen/llvm/llvm_ir_builder.hpp | 12 ++- test/benchmark/CMakeLists.txt | 5 +- test/unit/codegen/codegen_llvm_ir.cpp | 36 +++++++- test/unit/codegen/codegen_llvm_visitor.cpp | 6 +- 7 files changed, 139 insertions(+), 65 deletions(-) diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp index 5800beae6b..0f398526bf 100644 --- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp +++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp @@ -283,7 +283,7 @@ static void append_statements_from_block(ast::StatementVector& statements, * Create atomic statement for given expression of the form a[i] += expression * @param var Name of the variable on the LHS (it's an array), e.g. `a` * @param var_index Name of the index variable to access variable `var` e.g. `i` - * @param op_str Operators like += or -= + * @param op_str Operators like =, += or -= * @param rhs_str expression that will be added or subtracted from `var[var_index]` * @return A statement representing atomic operation using `ast::CodegenAtomicStatement` */ @@ -299,23 +299,9 @@ static std::shared_ptr create_atomic_statement( /*at=*/nullptr, /*index=*/nullptr); - // LLVM IR generation is now only supporting assignment (=) and not += or -= - // So we need to write increment operation a += b as an assignment operation - // a = a + b. - // See https://github.com/BlueBrain/nmodl/issues/851 - - std::string op(op_str); - stringutils::remove_character(op, '='); - - // make sure only + or - operator is used - if (op_str != "-" && op_str != "+") { - throw std::runtime_error("Unsupported binary operator for atomic statement"); - } - - auto* rhs = create_expression("{}[{}] {} {} "_format(var, var_index, op, rhs_str)); - return std::make_shared(lhs, - ast::BinaryOperator{ast::BOP_ASSIGN}, - rhs); + auto op = ast::BinaryOperator(ast::string_to_binaryop(op_str)); + auto rhs = create_expression(rhs_str); + return std::make_shared(lhs, op, rhs); } /** @@ -420,22 +406,7 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type, index_statements.push_back(visitor::create_statement(index_statement)); // pass ion variable to write and its index - - // lhs variable - std::string lhs = "{}[{}] "_format(ion_varname, index_varname); - - // lets turn a += b into a = a + b if applicable - // note that this is done in order to facilitate existing implementation in the llvm - // backend which doesn't support += or -= operators. - std::string statement; - if (!op.compare("+=")) { - statement = "{} = {} + {}"_format(lhs, lhs, rhs); - } else if (!op.compare("-=")) { - statement = "{} = {} - {}"_format(lhs, lhs, rhs); - } else { - statement = "{} {} {}"_format(lhs, op, rhs); - } - body_statements.push_back(visitor::create_statement(statement)); + body_statements.push_back(create_atomic_statement(ion_varname, index_varname, op, rhs)); }; /// iterate over all ions and create write ion statements for given block type diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp index de6c7ad914..1516f23634 100644 --- a/src/codegen/llvm/codegen_llvm_visitor.cpp +++ b/src/codegen/llvm/codegen_llvm_visitor.cpp @@ -512,31 +512,73 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) { ir_builder.create_boolean_constant(node.get_value()); } -/** - * Currently, this functions is very similar to visiting the binary operator. However, the - * difference here is that the writes to the LHS variable must be atomic. These has a particular - * use case in synapse kernels. For simplicity, we choose not to support atomic writes at this - * stage and emit a warning. - * - * \todo support this properly. - */ void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) { - if (platform.is_cpu_with_simd()) - logger->warn("Atomic operations are not supported"); + // Get the variable node that need an atomic update. + const auto& var = std::dynamic_pointer_cast(node.get_lhs()); + if (!var) + throw std::runtime_error("Error: only 'VarName' update is supported\n"); - // Support only assignment for now. + // Evaluate RHS of the update. llvm::Value* rhs = accept_and_get(node.get_rhs()); - if (node.get_atomic_op().get_value() != ast::BinaryOp::BOP_ASSIGN) - throw std::runtime_error( - "Error: only assignment is supported for CodegenAtomicStatement\n"); - const auto& var = dynamic_cast(node.get_lhs().get()); - if (!var) - throw std::runtime_error("Error: only 'VarName' assignment is supported\n"); - // Process the assignment as if it was non-atomic. - if (platform.is_cpu_with_simd()) - logger->warn("Treating write as non-atomic"); - write_to_variable(*var, rhs); + // First, check if it is an atomic write only and we can return early. + // Otherwise, extract what kind of atomic update we want to make. + ast::BinaryOp atomic_op = node.get_atomic_op().get_value(); + if (atomic_op == ast::BinaryOp::BOP_ASSIGN) { + write_to_variable(*var, rhs); + return; + } + ast::BinaryOp op = ir_builder.extract_atomic_op(atomic_op); + + // For different platforms, we handle atomic updates differently! + if (platform.is_cpu_with_simd()) { + throw std::runtime_error("Error: no atomic update support for SIMD CPUs\n"); + } else if (platform.is_gpu()) { + const auto& identifier = var->get_name(); + + // We only need to support atomic updates to instance struct members. + if (!identifier->is_codegen_instance_var()) + throw std::runtime_error("Error: atomic updates for non-instance variable\n"); + + const auto& node = std::dynamic_pointer_cast(identifier); + const auto& instance_name = node->get_instance_var()->get_node_name(); + const auto& member_node = node->get_member_var(); + const auto& member_name = member_node->get_node_name(); + + if (!instance_var_helper.is_an_instance_variable(member_name)) + throw std::runtime_error("Error: " + member_name + + " is not a member of the instance variable\n"); + + llvm::Value* instance_ptr = ir_builder.create_load(instance_name); + int member_index = instance_var_helper.get_variable_index(member_name); + llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index); + + // Some sanity checks. + auto codegen_var_with_type = instance_var_helper.get_variable(member_name); + if (!codegen_var_with_type->get_is_pointer()) + throw std::runtime_error( + "Error: atomic updates are allowed on pointer variables only\n"); + const auto& member_var_name = std::dynamic_pointer_cast(member_node); + if (!member_var_name->get_name()->is_indexed_name()) + throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n"); + const auto& member_indexed_name = std::dynamic_pointer_cast( + member_var_name->get_name()); + if (!member_indexed_name->get_length()->is_name()) + throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!"); + + llvm::Value* i64_index = get_index(*member_indexed_name); + llvm::Value* instance_member = ir_builder.create_load(member_ptr); + llvm::Value* ptr = ir_builder.create_inbounds_gep(instance_member, i64_index); + + ir_builder.create_atomic_op(ptr, rhs, op); + } else { + // For non-SIMD CPUs, updates don't have to be atomic at all! + llvm::Value* lhs = accept_and_get(node.get_lhs()); + ir_builder.create_binary_op(lhs, rhs, op); + llvm::Value* result = ir_builder.pop_last_value(); + + write_to_variable(*var, result); + } } // Generating FOR loop in LLVM IR creates the following structure: diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp index 82cb820049..efbd7aa050 100644 --- a/src/codegen/llvm/llvm_ir_builder.cpp +++ b/src/codegen/llvm/llvm_ir_builder.cpp @@ -293,6 +293,28 @@ void IRBuilder::create_array_alloca(const std::string& name, create_alloca(name, array_type); } +ast::BinaryOp IRBuilder::extract_atomic_op(ast::BinaryOp op) { + switch (op) { + case ast::BinaryOp::BOP_SUB_ASSIGN: + return ast::BinaryOp::BOP_SUBTRACTION; + case ast::BinaryOp::BOP_ADD_ASSIGN: + return ast::BinaryOp::BOP_ADDITION; + default: + throw std::runtime_error("Error: only atomic addition and subtraction is supported\n"); + } +} + +void IRBuilder::create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::BinaryOp op) { + if (op == ast::BinaryOp::BOP_SUBTRACTION) { + update = builder.CreateFNeg(update); + } + builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, + ptr, + update, + llvm::MaybeAlign(), + llvm::AtomicOrdering::SequentiallyConsistent); +} + void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op) { // Check that both lhs and rhs have the same types. if (lhs->getType() != rhs->getType()) diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp index 1b144afcfd..67db6fcded 100644 --- a/src/codegen/llvm/llvm_ir_builder.hpp +++ b/src/codegen/llvm/llvm_ir_builder.hpp @@ -146,6 +146,9 @@ class IRBuilder { return vectorize && mask; } + /// Extracts binary operator (+ or -) from atomic update (+= or =-). + ast::BinaryOp extract_atomic_op(ast::BinaryOp op); + /// Generates LLVM IR to allocate the arguments of the function on the stack. void allocate_function_arguments(llvm::Function* function, const ast::CodegenVarWithTypeVector& nmodl_arguments); @@ -158,6 +161,9 @@ class IRBuilder { /// Generates LLVM IR for the given binary operator. void create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op); + /// Generates LLVM IR for the given atomic operator. + void create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::BinaryOp op); + /// Generates LLVM IR for the bitcast instruction. llvm::Value* create_bitcast(llvm::Value* value, llvm::Type* dst_type); @@ -304,13 +310,13 @@ class IRBuilder { /// Pops the last visited value from the value stack. llvm::Value* pop_last_value(); + /// Generates an inbounds GEP instruction for the given value and returns calculated address. + llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index); + private: /// Generates an inbounds GEP instruction for the given name and returns calculated address. llvm::Value* create_inbounds_gep(const std::string& variable_name, llvm::Value* index); - /// Generates an inbounds GEP instruction for the given value and returns calculated address. - llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index); - /// Returns a scalar constant of the provided type. template llvm::Value* get_scalar_constant(llvm::Type* type, V value); diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt index 5529b505d2..f8f6c762f0 100644 --- a/test/benchmark/CMakeLists.txt +++ b/test/benchmark/CMakeLists.txt @@ -44,9 +44,8 @@ if(NMODL_ENABLE_PYTHON_BINDINGS) set_tests_properties( "PyJIT/${modfile_name}" PROPERTIES ENVIRONMENT PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH}) - # Disable running the expsyn.mod on GPU because atomic instructions are not supported yet on GPU - # See https://github.com/BlueBrain/nmodl/issues/834 - if(NMODL_ENABLE_LLVM_CUDA AND NOT ${modfile} STREQUAL "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/expsyn.mod") + + if(NMODL_ENABLE_LLVM_CUDA) add_test(NAME "PyJIT/${modfile_name}_gpu" COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py --file ${modfile} --gpu ${extra_args}) diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp index ebef71688e..3b810edbbe 100644 --- a/test/unit/codegen/codegen_llvm_ir.cpp +++ b/test/unit/codegen/codegen_llvm_ir.cpp @@ -1360,7 +1360,7 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") { } g = (g-rhs)/0.001 mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001 - mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id] + mech->ion_ina[ion_ina_id] += mech->ina[id] mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs mech->vec_d[node_id] = mech->vec_d[node_id]+g })"; @@ -1807,4 +1807,38 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") { REQUIRE(!std::regex_search(module_string, m, log_old_call)); } } + + GIVEN("For current update with atomic addition ") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena WRITE ina + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { } + )"; + + THEN("corresponding LLVM atomic instruction is generated") { + std::string module_string = run_gpu_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false); + std::smatch m; + + // Check for atomic addition. + std::regex add(R"(atomicrmw fadd double\* %.*, double %.* seq_cst)"); + REQUIRE(std::regex_search(module_string, m, add)); + } + } } diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp index af9bed5e7c..1e3504ae61 100644 --- a/test/unit/codegen/codegen_llvm_visitor.cpp +++ b/test/unit/codegen/codegen_llvm_visitor.cpp @@ -462,8 +462,8 @@ SCENARIO("Channel: Derivative and breakpoint block llvm transformations", g = (g-rhs)/0.001 mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001 mech->ion_dikdv[ion_dikdv_id] = mech->ion_dikdv[ion_dikdv_id]+(dik-mech->ik[id])/0.001 - mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id] - mech->ion_ik[ion_ik_id] = mech->ion_ik[ion_ik_id]+mech->ik[id] + mech->ion_ina[ion_ina_id] += mech->ina[id] + mech->ion_ik[ion_ik_id] += mech->ik[id] mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs mech->vec_d[node_id] = mech->vec_d[node_id]+g } @@ -593,7 +593,7 @@ SCENARIO("Synapse: Derivative and breakpoint block llvm transformations", } mech->g[id] = (mech->g[id]-rhs)/0.001 mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001*1.e2/mech->node_area[node_area_id] - mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]*(1.e2/mech->node_area[node_area_id]) + mech->ion_ina[ion_ina_id] += mech->ina[id]*(1.e2/mech->node_area[node_area_id]) mfactor = 1.e2/mech->node_area[node_area_id] mech->g[id] = mech->g[id]*mfactor rhs = rhs*mfactor