diff --git a/keopscore/keopscore/formulas/factorization/Factorize.py b/keopscore/keopscore/formulas/factorization/Factorize.py index 0bd37a08..e690000f 100644 --- a/keopscore/keopscore/formulas/factorization/Factorize.py +++ b/keopscore/keopscore/formulas/factorization/Factorize.py @@ -86,23 +86,23 @@ def DiffT(self, v, gradin): f, g = self.children (aliasvar,) = self.params f = f.replace(aliasvar, g) - return Factorize(f.DiffT(v, gradin), g) + return Factorize_(f.DiffT(v, gradin), g) # parameters for testing the operation (optional) enable_test = False # enable testing for this operation -def Factorize_(formula, g, v): +def Factorize(formula, g, v): if isinstance(formula, Factorize_Impl): if set.intersection(set(formula.defined_temp_Vars), set(g.Vars_)): f_inner, g_inner = formula.children v_inner = formula.aliasvar - return Factorize_Impl(Factorize_(f_inner, g, v), g_inner, v_inner) + return Factorize_Impl(Factorize(f_inner, g, v), g_inner, v_inner) res = Factorize_Impl(formula, g, v) return res -def Factorize(formula, g): +def Factorize_(formula, g): if type(g) in (Var, Zero, IntCst_Impl): return formula # we get a new negative index (negative because it must not refer to an actual input tensor index) @@ -112,14 +112,14 @@ def Factorize(formula, g): v = Var(newind, g.dim, 3) newformula, cnt = formula.replace_and_count(g, v) if cnt > 1: - return Factorize_(newformula, g, v) + return Factorize(newformula, g, v) else: return formula def AutoFactorize(formula): def RecSearch(formula, g): - newformula = Factorize(formula, g) + newformula = Factorize_(formula, g) if newformula != formula: return newformula for child in g.children: diff --git a/keopscore/keopscore/formulas/factorization/__init__.py b/keopscore/keopscore/formulas/factorization/__init__.py index 970407e1..deea7471 100644 --- a/keopscore/keopscore/formulas/factorization/__init__.py +++ b/keopscore/keopscore/formulas/factorization/__init__.py @@ -1 +1 @@ -from .Factorize import Factorize, AutoFactorize +from .Factorize import Factorize_, AutoFactorize diff --git a/keopscore/keopscore/formulas/maths/Concat.py b/keopscore/keopscore/formulas/maths/Concat.py index d51988d4..0c293144 100644 --- a/keopscore/keopscore/formulas/maths/Concat.py +++ b/keopscore/keopscore/formulas/maths/Concat.py @@ -1,14 +1,14 @@ from keopscore.formulas.Operation import Operation from keopscore.formulas.maths.Extract import Extract from keopscore.utils.misc_utils import KeOps_Error - +from keopscore.formulas.variables.Zero import Zero ############################ ###### Concat ##### ############################ -class Concat(Operation): +class Concat_Impl(Operation): string_id = "Concat" print_spec = ("[", "]"), "brackets", 9 linearity_type = "all" @@ -36,3 +36,9 @@ def DiffT(self, v, gradin): nargs = 2 # number of arguments test_argdims = [5, 3] # dimensions of arguments for testing torch_op = None + +def Concat(arg0, arg1): + if isinstance(arg0, Zero) and isinstance(arg1, Zero): + return Zero(arg0.dim+arg1.dim) + else: + return Concat_Impl(arg0, arg1) \ No newline at end of file diff --git a/keopscore/keopscore/formulas/variables/Var.py b/keopscore/keopscore/formulas/variables/Var.py index fe01b9a0..484b46b8 100644 --- a/keopscore/keopscore/formulas/variables/Var.py +++ b/keopscore/keopscore/formulas/variables/Var.py @@ -1,3 +1,4 @@ +from keopscore.formulas.maths.Concat import Concat from keopscore.utils.meta_toolbox import c_empty_instruction from keopscore.formulas.Operation import Operation @@ -62,7 +63,17 @@ def Op(self, out, table): def DiffT(self, v, gradin): from keopscore.formulas.variables.Zero import Zero - return gradin if v == self else Zero(v.dim) + if isinstance(v, Var): + return gradin if v == self else Zero(v.dim) + elif isinstance(v, list): + if len(v)==1: + return self.DiffT(v[0]) + else: + grads = [self.DiffT(u, gradin) for u in v] + res = grads[0] + for k in range(len(v)-1): + res = Concat(res, grads[k+1]) + return res def chunked_version(self, dimchk): return Var(self.ind, dimchk, self.cat) diff --git a/keopscore/keopscore/sandbox/test_grad.py b/keopscore/keopscore/sandbox/test_grad.py new file mode 100644 index 00000000..10d00ce4 --- /dev/null +++ b/keopscore/keopscore/sandbox/test_grad.py @@ -0,0 +1,32 @@ +# Testing grad engine + +from keopscore.formulas import * +from keopscore.utils.TestFormula import TestFormula +D, Dv = 3, 2 +x = Var(0, D, 0, "x") +y = Var(1, D, 1, "y") +b = Var(2, Dv, 1, "b") + +f = Exp(-SqDist(x, y)) * b +print("f =", f) + +u = Var(3, Dv, 0, "u") + +g = Concat(Grad(f,x,u),Grad(f,b,u)) +print("g=",g) + +TestFormula(g, randseed=1) + +g = AutoFactorize(g) +print("g=",g) + +TestFormula(g, randseed=1) + +g = Grad(f,[x,b],u) +print("g=",g) + +TestFormula(g, randseed=1) + +g = AutoFactorize(g) +print("g=",g) +TestFormula(g, randseed=1) \ No newline at end of file diff --git a/keopscore/keopscore/utils/TestFormula.py b/keopscore/keopscore/utils/TestFormula.py index 293a886b..f807d783 100644 --- a/keopscore/keopscore/utils/TestFormula.py +++ b/keopscore/keopscore/utils/TestFormula.py @@ -1,13 +1,12 @@ -import time +from time import time import torch import numpy as np from torch.autograd import grad from pykeops.torch import Genred from keopscore.formulas import * -import types -def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=None): +def TestFormula(formula, dtype="float32", test_grad=False, randseed=None): # N.B. dtype can be 'float32', 'float64' or 'float16' if isinstance(formula, str): @@ -28,8 +27,8 @@ def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=No ##################################################################### # Declare random inputs: - M = 3000 - N = 5000 + M = 300000 + N = 500000 # Choose the storage place for our data : CPU (host) or GPU (device) memory. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -58,6 +57,11 @@ def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=No c = my_routine(*args) print("ok, no error") + start = time() + for k in range(10): + c = my_routine(*args) + end = time() + print("average time over 10 calls:", (end-start)/10) print("5 first values :", *c.flatten()[:5].tolist()) #################################################################### @@ -72,6 +76,12 @@ def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=No g = grad(c, args, e) print("ok, no error") + start = time() + for k in range(10): + g = grad(c, args, e) + end = time() + print("average time over 10 calls:", (end-start)/10) + for k in range(nargs): app_str = f"number {k}" if len(args) > 1 else "" print( diff --git a/keopscore/keopscore/utils/Tree.py b/keopscore/keopscore/utils/Tree.py index 528da6ac..beaf9319 100644 --- a/keopscore/keopscore/utils/Tree.py +++ b/keopscore/keopscore/utils/Tree.py @@ -68,10 +68,10 @@ def collect(self, attr, res=[]): def nice_print(self): import os - formula_string = self.__repr__() + formula_string = self.__str__() varstrings = [] for v in self.Vars_: - var_string = v.__repr__() + var_string = v.__str__() formula_string = formula_string.replace(var_string, v.label) if v.ind >= 0: varstrings.append(f"{v.label}={var_string}") @@ -111,10 +111,12 @@ def recursive_fun(formula, rootindex, maxindex): print(f"Saved formula graph to file {filename}.") def __str__(self): - return self.nice_print() # self.recursive_str() + return self.recursive_str() # self.nice_print() # def __repr__(self): - return self.recursive_str() + inner_strings = [child.__repr__() for child in self.children] + inner_strings += [param.__repr__() for param in self.params] + return self.string_id + "(" + ",".join(inner_strings) + ")" # custom __eq__ method def __eq__(self, other): diff --git a/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py b/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py index efe38a0f..ba6dbd16 100644 --- a/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py +++ b/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py @@ -10,7 +10,7 @@ dtype = torch.float32 -test_grad = False +test_grad = True test_grad2 = False device_id = "cuda:0" if torch.cuda.is_available() else "cpu" do_warmup = True @@ -29,7 +29,7 @@ def fun(x, y, b, backend): Dxy = ((x - y) ** 2).sum(dim=2) Kxy = (-Dxy).exp() if backend == "keops": - out = LazyTensor.__matmul__(Kxy, b, sum_scheme="direct_acc") + out = LazyTensor.__matmul__(Kxy, b) else: out = Kxy @ b if device_id != "cpu":