From 9537bf31a70bc87d134f2b7248769f36f838c417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Glaune=CC=80s?= Date: Mon, 6 May 2024 20:18:37 +0200 Subject: [PATCH 1/4] use meta toolbox for cpuReduc --- keopscore/keopscore/mapreduce/cpu/CpuReduc.py | 69 ++++++++++++------- .../utils/meta_toolbox/c_function.py | 9 +++ .../pykeops/sandbox/test_lazytensor_clamp.py | 4 +- 3 files changed, 57 insertions(+), 25 deletions(-) diff --git a/keopscore/keopscore/mapreduce/cpu/CpuReduc.py b/keopscore/keopscore/mapreduce/cpu/CpuReduc.py index 19d396777..4aa3db794 100644 --- a/keopscore/keopscore/mapreduce/cpu/CpuReduc.py +++ b/keopscore/keopscore/mapreduce/cpu/CpuReduc.py @@ -1,8 +1,14 @@ import keopscore from keopscore.binders.cpp.Cpu_link_compile import Cpu_link_compile +from keopscore.utils.meta_toolbox.c_function import templated_function from keopscore.mapreduce.cpu.CpuAssignZero import CpuAssignZero from keopscore.mapreduce.MapReduce import MapReduce -from keopscore.utils.meta_toolbox import c_include +from keopscore.utils.meta_toolbox import ( + c_include, + c_variable, + c_for, + c_instruction_from_string, +) import keopscore @@ -34,6 +40,11 @@ def get_code(self): table = self.varloader.direct_table(args, i, j) sum_scheme = self.sum_scheme + nx = c_variable("signed long int", "nx") + ny = c_variable("signed long int", "ny") + + out = self.out + headers = ["cmath", "stdlib.h"] if keopscore.config.config.use_OpenMP: headers.append("omp.h") @@ -41,28 +52,40 @@ def get_code(self): headers.append("iostream") self.headers += c_include(*headers) - self.code = f""" -{self.headers} -template < typename TYPE > -int CpuConv_{self.gencode_filename}(signed long int nx, signed long int ny, TYPE* out, TYPE **{arg.id}) {{ - #pragma omp parallel for - for (signed long int i = 0; i < nx; i++) {{ - {fout.declare()} - {acc.declare()} - {sum_scheme.declare_temporary_accumulator()} - {red_formula.InitializeReduction(acc)} - {sum_scheme.initialize_temporary_accumulator()} - for (signed long int j = 0; j < ny; j++) {{ - {red_formula.formula(fout,table,i,j,tagI)} - {sum_scheme.accumulate_result(acc, fout, j)} - {sum_scheme.periodic_accumulate_temporary(acc, j)} - }} - {sum_scheme.final_operation(acc)} - {red_formula.FinalizeOutput(acc, outi, i)} - }} - return 0; -}} - """ + code = self.headers + templated_function( + name="CpuConv_" + self.gencode_filename, + input_vars=(nx, ny, out, arg), + body=( + c_for( + decorator="#pragma omp parallel for", + init=i.declare_assign(0), + end=i < nx, + loop=i.plus_plus, + body=( + fout.declare(), + acc.declare(), + sum_scheme.declare_temporary_accumulator(), + red_formula.InitializeReduction(acc), + sum_scheme.initialize_temporary_accumulator(), + c_for( + init=j.declare_assign(0), + end=j < ny, + loop=j.plus_plus, + body=( + red_formula.formula(fout, table, i, j, tagI), + sum_scheme.accumulate_result(acc, fout, j), + sum_scheme.periodic_accumulate_temporary(acc, j), + ), + ), + sum_scheme.final_operation(acc), + red_formula.FinalizeOutput(acc, outi, i), + ), + ), + c_instruction_from_string("return 0"), + ), + ) + + self.code = str(code) self.code += f""" #include "stdarg.h" diff --git a/keopscore/keopscore/utils/meta_toolbox/c_function.py b/keopscore/keopscore/utils/meta_toolbox/c_function.py index ce7cc7de9..0e68585e9 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_function.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_function.py @@ -48,3 +48,12 @@ class cuda_global_kernel(c_function): def __init__(self, name, input_vars=(), body=c_empty_instruction, **kwargs): super().__init__('extern "C" __global__ void', name, input_vars, body, **kwargs) + + +class templated_function(c_function): + + def __init__( + self, name, input_vars=(), body=c_empty_instruction, typename="TYPE", **kwargs + ): + decorator = f"template < typename {typename} >" + super().__init__("int", name, input_vars, body, decorator=decorator, **kwargs) diff --git a/pykeops/pykeops/sandbox/test_lazytensor_clamp.py b/pykeops/pykeops/sandbox/test_lazytensor_clamp.py index 2d021f3ac..2e937b528 100644 --- a/pykeops/pykeops/sandbox/test_lazytensor_clamp.py +++ b/pykeops/pykeops/sandbox/test_lazytensor_clamp.py @@ -8,11 +8,11 @@ M, N, D = 1000, 1000, 3 -test_grad = True +test_grad = False device_id = "cuda:0" if torch.cuda.is_available() else "cpu" -do_warmup = True +do_warmup = False x = torch.randn(M, 1, D, requires_grad=test_grad, device=device_id) y = torch.randn(1, N, D, device=device_id) From 5152703c78c2718325c03c7a57cc86937553ca7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Glaune=CC=80s?= Date: Mon, 6 May 2024 22:05:43 +0200 Subject: [PATCH 2/4] fixes for clamp and divide ops --- keopscore/keopscore/formulas/maths/ClampInt.py | 3 ++- keopscore/keopscore/formulas/maths/DiffClampInt.py | 3 ++- keopscore/keopscore/formulas/maths/Divide.py | 8 ++++---- keopscore/keopscore/utils/Tree.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/keopscore/keopscore/formulas/maths/ClampInt.py b/keopscore/keopscore/formulas/maths/ClampInt.py index 0e1f97358..fae2fb163 100644 --- a/keopscore/keopscore/formulas/maths/ClampInt.py +++ b/keopscore/keopscore/formulas/maths/ClampInt.py @@ -27,8 +27,9 @@ class Class(ClampInt_Impl): """ string_id = "ClampInt" + print_fun = lambda x: f"ClampInt({x},{a},{b})" - ScalarOpFun = keops_clampint + ScalarOpFun = lambda x : keops_clampint(x,a,b) @staticmethod def Derivative(x): diff --git a/keopscore/keopscore/formulas/maths/DiffClampInt.py b/keopscore/keopscore/formulas/maths/DiffClampInt.py index 07055552f..e18c117d9 100644 --- a/keopscore/keopscore/formulas/maths/DiffClampInt.py +++ b/keopscore/keopscore/formulas/maths/DiffClampInt.py @@ -29,8 +29,9 @@ def __init__(self, a, b): class Class(DiffClampInt_Impl): string_id = "DiffClampInt" + print_fun = lambda x: f"DiffClampInt({x},{a},{b})" - ScalarOpFun = keops_diffclampint + ScalarOpFun = lambda x : keops_diffclampint(x,a,b) def DiffT_fun(self, v, gradin): from keopscore.formulas import Zero diff --git a/keopscore/keopscore/formulas/maths/Divide.py b/keopscore/keopscore/formulas/maths/Divide.py index 4bc48be59..bf656bc30 100644 --- a/keopscore/keopscore/formulas/maths/Divide.py +++ b/keopscore/keopscore/formulas/maths/Divide.py @@ -3,8 +3,8 @@ from keopscore.formulas.maths.Scalprod import Scalprod from keopscore.formulas.maths.Sum import Sum from keopscore.formulas.maths.Square import Square -from keopscore.formulas.variables.Zero import Zero -from keopscore.formulas.variables.IntCst import IntCst, IntCst_Impl +from keopscore.formulas.variables.Zero import Zero_Impl +from keopscore.formulas.variables.IntCst import IntCst_Impl from keopscore.formulas.variables.RatCst import RatCst, RatCst_Impl from keopscore.utils.misc_utils import KeOps_Error from keopscore.utils.math_functions import keops_div @@ -43,9 +43,9 @@ def DiffT_fun(self, v, gradin): # N.B. The following separate function should theoretically be implemented # as a __new__ method of the previous class, but this can generate infinite recursion problems def Divide(arg0, arg1): - if isinstance(arg0, Zero): + if isinstance(arg0, Zero_Impl): return Broadcast(arg0, arg1.dim) - elif isinstance(arg1, Zero): + elif isinstance(arg1, Zero_Impl): KeOps_Error("division by zero") elif isinstance(arg1, IntCst_Impl): return RatCst(1, arg1.val) * arg0 diff --git a/keopscore/keopscore/utils/Tree.py b/keopscore/keopscore/utils/Tree.py index 5cdce1469..8a774d637 100644 --- a/keopscore/keopscore/utils/Tree.py +++ b/keopscore/keopscore/utils/Tree.py @@ -46,7 +46,7 @@ def nice_print(self): formula_string = formula_string.replace(var_string, v.label) if v.ind >= 0: varstrings.append(f"{v.label}={var_string}") - string = formula_string + " with " + ", ".join(varstrings) + string = formula_string + " with " + "; ".join(varstrings) return string def make_dot(self, filename=None): From 5b188a18091ce56c733e72c6c255d1f0792b4b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Glaune=CC=80s?= Date: Mon, 6 May 2024 22:05:54 +0200 Subject: [PATCH 3/4] lint --- keopscore/keopscore/formulas/maths/ClampInt.py | 2 +- keopscore/keopscore/formulas/maths/DiffClampInt.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keopscore/keopscore/formulas/maths/ClampInt.py b/keopscore/keopscore/formulas/maths/ClampInt.py index fae2fb163..1618e4341 100644 --- a/keopscore/keopscore/formulas/maths/ClampInt.py +++ b/keopscore/keopscore/formulas/maths/ClampInt.py @@ -29,7 +29,7 @@ class Class(ClampInt_Impl): string_id = "ClampInt" print_fun = lambda x: f"ClampInt({x},{a},{b})" - ScalarOpFun = lambda x : keops_clampint(x,a,b) + ScalarOpFun = lambda x: keops_clampint(x, a, b) @staticmethod def Derivative(x): diff --git a/keopscore/keopscore/formulas/maths/DiffClampInt.py b/keopscore/keopscore/formulas/maths/DiffClampInt.py index e18c117d9..62346c54a 100644 --- a/keopscore/keopscore/formulas/maths/DiffClampInt.py +++ b/keopscore/keopscore/formulas/maths/DiffClampInt.py @@ -31,7 +31,7 @@ class Class(DiffClampInt_Impl): string_id = "DiffClampInt" print_fun = lambda x: f"DiffClampInt({x},{a},{b})" - ScalarOpFun = lambda x : keops_diffclampint(x,a,b) + ScalarOpFun = lambda x: keops_diffclampint(x, a, b) def DiffT_fun(self, v, gradin): from keopscore.formulas import Zero From de237c415fd997bdca9d661b9e0a8eaa494febf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Glaune=CC=80s?= Date: Mon, 6 May 2024 23:26:34 +0200 Subject: [PATCH 4/4] cosmetic --- keopscore/keopscore/mapreduce/cpu/CpuReduc.py | 9 +++++---- keopscore/keopscore/utils/meta_toolbox/c_code.py | 3 +++ .../keopscore/utils/meta_toolbox/c_expression.py | 4 ++++ .../keopscore/utils/meta_toolbox/c_function.py | 14 +++++++++++--- .../keopscore/utils/meta_toolbox/c_instruction.py | 6 ++++++ 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/keopscore/keopscore/mapreduce/cpu/CpuReduc.py b/keopscore/keopscore/mapreduce/cpu/CpuReduc.py index 4aa3db794..8b09815aa 100644 --- a/keopscore/keopscore/mapreduce/cpu/CpuReduc.py +++ b/keopscore/keopscore/mapreduce/cpu/CpuReduc.py @@ -1,13 +1,13 @@ import keopscore from keopscore.binders.cpp.Cpu_link_compile import Cpu_link_compile -from keopscore.utils.meta_toolbox.c_function import templated_function +from keopscore.utils.meta_toolbox.c_function import c_templated_function from keopscore.mapreduce.cpu.CpuAssignZero import CpuAssignZero from keopscore.mapreduce.MapReduce import MapReduce from keopscore.utils.meta_toolbox import ( c_include, c_variable, c_for, - c_instruction_from_string, + c_return, ) import keopscore @@ -52,7 +52,8 @@ def get_code(self): headers.append("iostream") self.headers += c_include(*headers) - code = self.headers + templated_function( + code = self.headers + c_templated_function( + dtype_out="int", name="CpuConv_" + self.gencode_filename, input_vars=(nx, ny, out, arg), body=( @@ -81,7 +82,7 @@ def get_code(self): red_formula.FinalizeOutput(acc, outi, i), ), ), - c_instruction_from_string("return 0"), + c_return(0), ), ) diff --git a/keopscore/keopscore/utils/meta_toolbox/c_code.py b/keopscore/keopscore/utils/meta_toolbox/c_code.py index 654b494fb..11fe2d7d4 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_code.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_code.py @@ -31,3 +31,6 @@ def c_include(*headers, **kwargs): def c_define(name, value, **kwargs): return c_code(f"#define {name} {value}\n", **kwargs) + + +c_line_break = c_code("\n") diff --git a/keopscore/keopscore/utils/meta_toolbox/c_expression.py b/keopscore/keopscore/utils/meta_toolbox/c_expression.py index ed123c430..fa833da42 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_expression.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_expression.py @@ -74,6 +74,10 @@ def __truediv__(self, other): python_op = lambda x, y: x / y return self.binary_op(other, python_op, "/", "division") + def equals(self, other): + python_op = lambda x, y: x == y + return self.binary_op(other, python_op, "==", "comparison", dtype="bool") + def __lt__(self, other): python_op = lambda x, y: x < y return self.binary_op(other, python_op, "<", "comparison", dtype="bool") diff --git a/keopscore/keopscore/utils/meta_toolbox/c_function.py b/keopscore/keopscore/utils/meta_toolbox/c_function.py index 0e68585e9..07050b48b 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_function.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_function.py @@ -50,10 +50,18 @@ def __init__(self, name, input_vars=(), body=c_empty_instruction, **kwargs): super().__init__('extern "C" __global__ void', name, input_vars, body, **kwargs) -class templated_function(c_function): +class c_templated_function(c_function): def __init__( - self, name, input_vars=(), body=c_empty_instruction, typename="TYPE", **kwargs + self, + dtype_out, + name, + input_vars=(), + body=c_empty_instruction, + typename="TYPE", + **kwargs, ): decorator = f"template < typename {typename} >" - super().__init__("int", name, input_vars, body, decorator=decorator, **kwargs) + super().__init__( + dtype_out, name, input_vars, body, decorator=decorator, **kwargs + ) diff --git a/keopscore/keopscore/utils/meta_toolbox/c_instruction.py b/keopscore/keopscore/utils/meta_toolbox/c_instruction.py index b69c36368..836386629 100644 --- a/keopscore/keopscore/utils/meta_toolbox/c_instruction.py +++ b/keopscore/keopscore/utils/meta_toolbox/c_instruction.py @@ -1,4 +1,5 @@ from .c_code import c_code +from .c_expression import py2c from .misc import Meta_Toolbox_Error @@ -56,5 +57,10 @@ def c_instruction_from_string(string): return c_instruction(string, set(), set()) +def c_return(c_expr): + c_expr = py2c(c_expr) + return c_instruction_from_string(f"return {c_expr}") + + def c_comment(string): return c_instruction("// " + string, set(), set())