diff --git a/keopscore/keopscore/formulas/maths/ClampInt.py b/keopscore/keopscore/formulas/maths/ClampInt.py index 0e1f9735..1618e434 100644 --- a/keopscore/keopscore/formulas/maths/ClampInt.py +++ b/keopscore/keopscore/formulas/maths/ClampInt.py @@ -27,8 +27,9 @@ class Class(ClampInt_Impl): """ string_id = "ClampInt" + print_fun = lambda x: f"ClampInt({x},{a},{b})" - ScalarOpFun = keops_clampint + ScalarOpFun = lambda x: keops_clampint(x, a, b) @staticmethod def Derivative(x): diff --git a/keopscore/keopscore/formulas/maths/DiffClampInt.py b/keopscore/keopscore/formulas/maths/DiffClampInt.py index 07055552..62346c54 100644 --- a/keopscore/keopscore/formulas/maths/DiffClampInt.py +++ b/keopscore/keopscore/formulas/maths/DiffClampInt.py @@ -29,8 +29,9 @@ def __init__(self, a, b): class Class(DiffClampInt_Impl): string_id = "DiffClampInt" + print_fun = lambda x: f"DiffClampInt({x},{a},{b})" - ScalarOpFun = keops_diffclampint + ScalarOpFun = lambda x: keops_diffclampint(x, a, b) def DiffT_fun(self, v, gradin): from keopscore.formulas import Zero diff --git a/keopscore/keopscore/formulas/maths/Divide.py b/keopscore/keopscore/formulas/maths/Divide.py index 4bc48be5..bf656bc3 100644 --- a/keopscore/keopscore/formulas/maths/Divide.py +++ b/keopscore/keopscore/formulas/maths/Divide.py @@ -3,8 +3,8 @@ from keopscore.formulas.maths.Scalprod import Scalprod from keopscore.formulas.maths.Sum import Sum from keopscore.formulas.maths.Square import Square -from keopscore.formulas.variables.Zero import Zero -from keopscore.formulas.variables.IntCst import IntCst, IntCst_Impl +from keopscore.formulas.variables.Zero import Zero_Impl +from keopscore.formulas.variables.IntCst import IntCst_Impl from keopscore.formulas.variables.RatCst import RatCst, RatCst_Impl from keopscore.utils.misc_utils import KeOps_Error from keopscore.utils.math_functions import keops_div @@ -43,9 +43,9 @@ def DiffT_fun(self, v, gradin): # N.B. The following separate function should theoretically be implemented # as a __new__ method of the previous class, but this can generate infinite recursion problems def Divide(arg0, arg1): - if isinstance(arg0, Zero): + if isinstance(arg0, Zero_Impl): return Broadcast(arg0, arg1.dim) - elif isinstance(arg1, Zero): + elif isinstance(arg1, Zero_Impl): KeOps_Error("division by zero") elif isinstance(arg1, IntCst_Impl): return RatCst(1, arg1.val) * arg0 diff --git a/keopscore/keopscore/mapreduce/cpu/CpuReduc.py b/keopscore/keopscore/mapreduce/cpu/CpuReduc.py index 19d39677..8b09815a 100644 --- a/keopscore/keopscore/mapreduce/cpu/CpuReduc.py +++ b/keopscore/keopscore/mapreduce/cpu/CpuReduc.py @@ -1,8 +1,14 @@ import keopscore from keopscore.binders.cpp.Cpu_link_compile import Cpu_link_compile +from keopscore.utils.meta_toolbox.c_function import c_templated_function from keopscore.mapreduce.cpu.CpuAssignZero import CpuAssignZero from keopscore.mapreduce.MapReduce import MapReduce -from keopscore.utils.meta_toolbox import c_include +from keopscore.utils.meta_toolbox import ( + c_include, + c_variable, + c_for, + c_return, +) import keopscore @@ -34,6 +40,11 @@ def get_code(self): table = self.varloader.direct_table(args, i, j) sum_scheme = self.sum_scheme + nx = c_variable("signed long int", "nx") + ny = c_variable("signed long int", "ny") + + out = self.out + headers = ["cmath", "stdlib.h"] if keopscore.config.config.use_OpenMP: headers.append("omp.h") @@ -41,28 +52,41 @@ def get_code(self): headers.append("iostream") self.headers += c_include(*headers) - self.code = f""" -{self.headers} -template < typename TYPE > -int CpuConv_{self.gencode_filename}(signed long int nx, signed long int ny, TYPE* out, TYPE **{arg.id}) {{ - #pragma omp parallel for - for (signed long int i = 0; i < nx; i++) {{ - {fout.declare()} - {acc.declare()} - {sum_scheme.declare_temporary_accumulator()} - {red_formula.InitializeReduction(acc)} - {sum_scheme.initialize_temporary_accumulator()} - for (signed long int j = 0; j < ny; j++) {{ - {red_formula.formula(fout,table,i,j,tagI)} - {sum_scheme.accumulate_result(acc, fout, j)} - {sum_scheme.periodic_accumulate_temporary(acc, j)} - }} - {sum_scheme.final_operation(acc)} - {red_formula.FinalizeOutput(acc, outi, i)} - }} - return 0; -}} - """ + code = self.headers + c_templated_function( + dtype_out="int", + name="CpuConv_" + self.gencode_filename, + input_vars=(nx, ny, out, arg), + body=( + c_for( + decorator="#pragma omp parallel for", + init=i.declare_assign(0), + end=i < nx, + loop=i.plus_plus, + body=( + fout.declare(), + acc.declare(), + sum_scheme.declare_temporary_accumulator(), + red_formula.InitializeReduction(acc), + sum_scheme.initialize_temporary_accumulator(), + c_for( + init=j.declare_assign(0), + end=j < ny, + loop=j.plus_plus, + body=( + red_formula.formula(fout, table, i, j, tagI), + sum_scheme.accumulate_result(acc, fout, j), + sum_scheme.periodic_accumulate_temporary(acc, j), + ), + ), + sum_scheme.final_operation(acc), + red_formula.FinalizeOutput(acc, outi, i), + ), + ), + c_return(0), + ), + ) + + self.code = str(code) self.code += f""" #include "stdarg.h" diff --git a/keopscore/keopscore/utils/Tree.py b/keopscore/keopscore/utils/Tree.py index 5cdce146..8a774d63 100644 --- a/keopscore/keopscore/utils/Tree.py +++ b/keopscore/keopscore/utils/Tree.py @@ -46,7 +46,7 @@ def nice_print(self): formula_string = formula_string.replace(var_string, v.label) if v.ind >= 0: varstrings.append(f"{v.label}={var_string}") - string = formula_string + " with " + ", ".join(varstrings) + string = formula_string + " with " + "; ".join(varstrings) return string def make_dot(self, filename=None): diff --git a/keopscore/keopscore/utils/meta_toolbox/c_code.py b/keopscore/keopscore/utils/meta_toolbox/c_code.py index 654b494f..11fe2d7d 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_code.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_code.py @@ -31,3 +31,6 @@ def c_include(*headers, **kwargs): def c_define(name, value, **kwargs): return c_code(f"#define {name} {value}\n", **kwargs) + + +c_line_break = c_code("\n") diff --git a/keopscore/keopscore/utils/meta_toolbox/c_expression.py b/keopscore/keopscore/utils/meta_toolbox/c_expression.py index ed123c43..fa833da4 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_expression.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_expression.py @@ -74,6 +74,10 @@ def __truediv__(self, other): python_op = lambda x, y: x / y return self.binary_op(other, python_op, "/", "division") + def equals(self, other): + python_op = lambda x, y: x == y + return self.binary_op(other, python_op, "==", "comparison", dtype="bool") + def __lt__(self, other): python_op = lambda x, y: x < y return self.binary_op(other, python_op, "<", "comparison", dtype="bool") diff --git a/keopscore/keopscore/utils/meta_toolbox/c_function.py b/keopscore/keopscore/utils/meta_toolbox/c_function.py index ce7cc7de..07050b48 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_function.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_function.py @@ -48,3 +48,20 @@ class cuda_global_kernel(c_function): def __init__(self, name, input_vars=(), body=c_empty_instruction, **kwargs): super().__init__('extern "C" __global__ void', name, input_vars, body, **kwargs) + + +class c_templated_function(c_function): + + def __init__( + self, + dtype_out, + name, + input_vars=(), + body=c_empty_instruction, + typename="TYPE", + **kwargs, + ): + decorator = f"template < typename {typename} >" + super().__init__( + dtype_out, name, input_vars, body, decorator=decorator, **kwargs + ) diff --git a/keopscore/keopscore/utils/meta_toolbox/c_instruction.py b/keopscore/keopscore/utils/meta_toolbox/c_instruction.py index b69c3636..83638662 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_instruction.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_instruction.py @@ -1,4 +1,5 @@ from .c_code import c_code +from .c_expression import py2c from .misc import Meta_Toolbox_Error @@ -56,5 +57,10 @@ def c_instruction_from_string(string): return c_instruction(string, set(), set()) +def c_return(c_expr): + c_expr = py2c(c_expr) + return c_instruction_from_string(f"return {c_expr}") + + def c_comment(string): return c_instruction("// " + string, set(), set()) diff --git a/pykeops/pykeops/sandbox/test_lazytensor_clamp.py b/pykeops/pykeops/sandbox/test_lazytensor_clamp.py index 2d021f3a..2e937b52 100644 --- a/pykeops/pykeops/sandbox/test_lazytensor_clamp.py +++ b/pykeops/pykeops/sandbox/test_lazytensor_clamp.py @@ -8,11 +8,11 @@ M, N, D = 1000, 1000, 3 -test_grad = True +test_grad = False device_id = "cuda:0" if torch.cuda.is_available() else "cpu" -do_warmup = True +do_warmup = False x = torch.randn(M, 1, D, requires_grad=test_grad, device=device_id) y = torch.randn(1, N, D, device=device_id)