Skip to content

Commit

Permalink
Merge commit '1f729be95647c1e6cb18e5f2834203c2cc55cfa2' into new_back…
Browse files Browse the repository at this point in the history
…ward
  • Loading branch information
joanglaunes committed May 8, 2024
2 parents 543ab8f + 1f729be commit 197951f
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 32 deletions.
3 changes: 2 additions & 1 deletion keopscore/keopscore/formulas/maths/ClampInt.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ class Class(ClampInt_Impl):
"""

string_id = "ClampInt"
print_fun = lambda x: f"ClampInt({x},{a},{b})"

ScalarOpFun = keops_clampint
ScalarOpFun = lambda x: keops_clampint(x, a, b)

@staticmethod
def Derivative(x):
Expand Down
3 changes: 2 additions & 1 deletion keopscore/keopscore/formulas/maths/DiffClampInt.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ def __init__(self, a, b):

class Class(DiffClampInt_Impl):
string_id = "DiffClampInt"
print_fun = lambda x: f"DiffClampInt({x},{a},{b})"

ScalarOpFun = keops_diffclampint
ScalarOpFun = lambda x: keops_diffclampint(x, a, b)

def DiffT_fun(self, v, gradin):
from keopscore.formulas import Zero
Expand Down
8 changes: 4 additions & 4 deletions keopscore/keopscore/formulas/maths/Divide.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from keopscore.formulas.maths.Scalprod import Scalprod
from keopscore.formulas.maths.Sum import Sum
from keopscore.formulas.maths.Square import Square
from keopscore.formulas.variables.Zero import Zero
from keopscore.formulas.variables.IntCst import IntCst, IntCst_Impl
from keopscore.formulas.variables.Zero import Zero_Impl
from keopscore.formulas.variables.IntCst import IntCst_Impl
from keopscore.formulas.variables.RatCst import RatCst, RatCst_Impl
from keopscore.utils.misc_utils import KeOps_Error
from keopscore.utils.math_functions import keops_div
Expand Down Expand Up @@ -43,9 +43,9 @@ def DiffT_fun(self, v, gradin):
# N.B. The following separate function should theoretically be implemented
# as a __new__ method of the previous class, but this can generate infinite recursion problems
def Divide(arg0, arg1):
if isinstance(arg0, Zero):
if isinstance(arg0, Zero_Impl):
return Broadcast(arg0, arg1.dim)
elif isinstance(arg1, Zero):
elif isinstance(arg1, Zero_Impl):
KeOps_Error("division by zero")
elif isinstance(arg1, IntCst_Impl):
return RatCst(1, arg1.val) * arg0
Expand Down
70 changes: 47 additions & 23 deletions keopscore/keopscore/mapreduce/cpu/CpuReduc.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import keopscore
from keopscore.binders.cpp.Cpu_link_compile import Cpu_link_compile
from keopscore.utils.meta_toolbox.c_function import c_templated_function
from keopscore.mapreduce.cpu.CpuAssignZero import CpuAssignZero
from keopscore.mapreduce.MapReduce import MapReduce
from keopscore.utils.meta_toolbox import c_include
from keopscore.utils.meta_toolbox import (
c_include,
c_variable,
c_for,
c_return,
)
import keopscore


Expand Down Expand Up @@ -34,35 +40,53 @@ def get_code(self):
table = self.varloader.direct_table(args, i, j)
sum_scheme = self.sum_scheme

nx = c_variable("signed long int", "nx")
ny = c_variable("signed long int", "ny")

out = self.out

headers = ["cmath", "stdlib.h"]
if keopscore.config.config.use_OpenMP:
headers.append("omp.h")
if keopscore.debug_ops_at_exec:
headers.append("iostream")
self.headers += c_include(*headers)

self.code = f"""
{self.headers}
template < typename TYPE >
int CpuConv_{self.gencode_filename}(signed long int nx, signed long int ny, TYPE* out, TYPE **{arg.id}) {{
#pragma omp parallel for
for (signed long int i = 0; i < nx; i++) {{
{fout.declare()}
{acc.declare()}
{sum_scheme.declare_temporary_accumulator()}
{red_formula.InitializeReduction(acc)}
{sum_scheme.initialize_temporary_accumulator()}
for (signed long int j = 0; j < ny; j++) {{
{red_formula.formula(fout,table,i,j,tagI)}
{sum_scheme.accumulate_result(acc, fout, j)}
{sum_scheme.periodic_accumulate_temporary(acc, j)}
}}
{sum_scheme.final_operation(acc)}
{red_formula.FinalizeOutput(acc, outi, i)}
}}
return 0;
}}
"""
code = self.headers + c_templated_function(
dtype_out="int",
name="CpuConv_" + self.gencode_filename,
input_vars=(nx, ny, out, arg),
body=(
c_for(
decorator="#pragma omp parallel for",
init=i.declare_assign(0),
end=i < nx,
loop=i.plus_plus,
body=(
fout.declare(),
acc.declare(),
sum_scheme.declare_temporary_accumulator(),
red_formula.InitializeReduction(acc),
sum_scheme.initialize_temporary_accumulator(),
c_for(
init=j.declare_assign(0),
end=j < ny,
loop=j.plus_plus,
body=(
red_formula.formula(fout, table, i, j, tagI),
sum_scheme.accumulate_result(acc, fout, j),
sum_scheme.periodic_accumulate_temporary(acc, j),
),
),
sum_scheme.final_operation(acc),
red_formula.FinalizeOutput(acc, outi, i),
),
),
c_return(0),
),
)

self.code = str(code)

self.code += f"""
#include "stdarg.h"
Expand Down
2 changes: 1 addition & 1 deletion keopscore/keopscore/utils/Tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def nice_print(self):
formula_string = formula_string.replace(var_string, v.label)
if v.ind >= 0:
varstrings.append(f"{v.label}={var_string}")
string = formula_string + " with " + ", ".join(varstrings)
string = formula_string + " with " + "; ".join(varstrings)
return string

def make_dot(self, filename=None):
Expand Down
3 changes: 3 additions & 0 deletions keopscore/keopscore/utils/meta_toolbox/c_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ def c_include(*headers, **kwargs):

def c_define(name, value, **kwargs):
return c_code(f"#define {name} {value}\n", **kwargs)


c_line_break = c_code("\n")
4 changes: 4 additions & 0 deletions keopscore/keopscore/utils/meta_toolbox/c_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def __truediv__(self, other):
python_op = lambda x, y: x / y
return self.binary_op(other, python_op, "/", "division")

def equals(self, other):
python_op = lambda x, y: x == y
return self.binary_op(other, python_op, "==", "comparison", dtype="bool")

def __lt__(self, other):
python_op = lambda x, y: x < y
return self.binary_op(other, python_op, "<", "comparison", dtype="bool")
Expand Down
17 changes: 17 additions & 0 deletions keopscore/keopscore/utils/meta_toolbox/c_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,20 @@ class cuda_global_kernel(c_function):

def __init__(self, name, input_vars=(), body=c_empty_instruction, **kwargs):
super().__init__('extern "C" __global__ void', name, input_vars, body, **kwargs)


class c_templated_function(c_function):

def __init__(
self,
dtype_out,
name,
input_vars=(),
body=c_empty_instruction,
typename="TYPE",
**kwargs,
):
decorator = f"template < typename {typename} >"
super().__init__(
dtype_out, name, input_vars, body, decorator=decorator, **kwargs
)
6 changes: 6 additions & 0 deletions keopscore/keopscore/utils/meta_toolbox/c_instruction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .c_code import c_code
from .c_expression import py2c
from .misc import Meta_Toolbox_Error


Expand Down Expand Up @@ -56,5 +57,10 @@ def c_instruction_from_string(string):
return c_instruction(string, set(), set())


def c_return(c_expr):
c_expr = py2c(c_expr)
return c_instruction_from_string(f"return {c_expr}")


def c_comment(string):
return c_instruction("// " + string, set(), set())
4 changes: 2 additions & 2 deletions pykeops/pykeops/sandbox/test_lazytensor_clamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

M, N, D = 1000, 1000, 3

test_grad = True
test_grad = False

device_id = "cuda:0" if torch.cuda.is_available() else "cpu"

do_warmup = True
do_warmup = False

x = torch.randn(M, 1, D, requires_grad=test_grad, device=device_id)
y = torch.randn(1, N, D, device=device_id)
Expand Down

0 comments on commit 197951f

Please sign in to comment.