diff --git a/keopscore/keopscore/formulas/factorization/Factorize.py b/keopscore/keopscore/formulas/factorization/Factorize.py
index 0bd37a08..e690000f 100644
--- a/keopscore/keopscore/formulas/factorization/Factorize.py
+++ b/keopscore/keopscore/formulas/factorization/Factorize.py
@@ -86,23 +86,23 @@ def DiffT(self, v, gradin):
         f, g = self.children
         (aliasvar,) = self.params
         f = f.replace(aliasvar, g)
-        return Factorize(f.DiffT(v, gradin), g)
+        return Factorize_(f.DiffT(v, gradin), g)
 
     # parameters for testing the operation (optional)
     enable_test = False  # enable testing for this operation
 
 
-def Factorize_(formula, g, v):
+def Factorize(formula, g, v):
     if isinstance(formula, Factorize_Impl):
         if set.intersection(set(formula.defined_temp_Vars), set(g.Vars_)):
             f_inner, g_inner = formula.children
             v_inner = formula.aliasvar
-            return Factorize_Impl(Factorize_(f_inner, g, v), g_inner, v_inner)
+            return Factorize_Impl(Factorize(f_inner, g, v), g_inner, v_inner)
     res = Factorize_Impl(formula, g, v)
     return res
 
 
-def Factorize(formula, g):
+def Factorize_(formula, g):
     if type(g) in (Var, Zero, IntCst_Impl):
         return formula
     # we get a new negative index (negative because it must not refer to an actual input tensor index)
@@ -112,14 +112,14 @@ def Factorize(formula, g):
     v = Var(newind, g.dim, 3)
     newformula, cnt = formula.replace_and_count(g, v)
     if cnt > 1:
-        return Factorize_(newformula, g, v)
+        return Factorize(newformula, g, v)
     else:
         return formula
 
 
 def AutoFactorize(formula):
     def RecSearch(formula, g):
-        newformula = Factorize(formula, g)
+        newformula = Factorize_(formula, g)
         if newformula != formula:
             return newformula
         for child in g.children:
diff --git a/keopscore/keopscore/formulas/factorization/__init__.py b/keopscore/keopscore/formulas/factorization/__init__.py
index 970407e1..deea7471 100644
--- a/keopscore/keopscore/formulas/factorization/__init__.py
+++ b/keopscore/keopscore/formulas/factorization/__init__.py
@@ -1 +1 @@
-from .Factorize import Factorize, AutoFactorize
+from .Factorize import Factorize_, AutoFactorize
diff --git a/keopscore/keopscore/formulas/maths/Concat.py b/keopscore/keopscore/formulas/maths/Concat.py
index d51988d4..0c293144 100644
--- a/keopscore/keopscore/formulas/maths/Concat.py
+++ b/keopscore/keopscore/formulas/maths/Concat.py
@@ -1,14 +1,14 @@
 from keopscore.formulas.Operation import Operation
 from keopscore.formulas.maths.Extract import Extract
 from keopscore.utils.misc_utils import KeOps_Error
-
+from keopscore.formulas.variables.Zero import Zero
 
 ############################
 ######    Concat       #####
 ############################
 
 
-class Concat(Operation):
+class Concat_Impl(Operation):
     string_id = "Concat"
     print_spec = ("[", "]"), "brackets", 9
     linearity_type = "all"
@@ -36,3 +36,9 @@ def DiffT(self, v, gradin):
     nargs = 2  # number of arguments
     test_argdims = [5, 3]  # dimensions of arguments for testing
     torch_op = None
+
+def Concat(arg0, arg1):
+    if isinstance(arg0, Zero) and isinstance(arg1, Zero):
+        return Zero(arg0.dim+arg1.dim)
+    else:
+        return Concat_Impl(arg0, arg1)
\ No newline at end of file
diff --git a/keopscore/keopscore/formulas/variables/Var.py b/keopscore/keopscore/formulas/variables/Var.py
index fe01b9a0..484b46b8 100644
--- a/keopscore/keopscore/formulas/variables/Var.py
+++ b/keopscore/keopscore/formulas/variables/Var.py
@@ -1,3 +1,4 @@
+from keopscore.formulas.maths.Concat import Concat
 from keopscore.utils.meta_toolbox import c_empty_instruction
 from keopscore.formulas.Operation import Operation
 
@@ -62,7 +63,17 @@ def Op(self, out, table):
     def DiffT(self, v, gradin):
         from keopscore.formulas.variables.Zero import Zero
 
-        return gradin if v == self else Zero(v.dim)
+        if isinstance(v, Var):
+            return gradin if v == self else Zero(v.dim)
+        elif isinstance(v, list):
+            if len(v)==1:
+                return self.DiffT(v[0])
+            else:
+                grads = [self.DiffT(u, gradin) for u in v]
+                res = grads[0]
+                for k in range(len(v)-1):
+                    res = Concat(res, grads[k+1])
+                return res
 
     def chunked_version(self, dimchk):
         return Var(self.ind, dimchk, self.cat)
diff --git a/keopscore/keopscore/sandbox/test_grad.py b/keopscore/keopscore/sandbox/test_grad.py
new file mode 100644
index 00000000..10d00ce4
--- /dev/null
+++ b/keopscore/keopscore/sandbox/test_grad.py
@@ -0,0 +1,32 @@
+# Testing grad engine
+
+from keopscore.formulas import *
+from keopscore.utils.TestFormula import TestFormula
+D, Dv = 3, 2
+x = Var(0, D, 0, "x")
+y = Var(1, D, 1, "y")
+b = Var(2, Dv, 1, "b")
+
+f = Exp(-SqDist(x, y)) * b
+print("f =", f)
+
+u = Var(3, Dv, 0, "u")
+
+g = Concat(Grad(f,x,u),Grad(f,b,u))
+print("g=",g)
+
+TestFormula(g, randseed=1)
+
+g = AutoFactorize(g)
+print("g=",g)
+
+TestFormula(g, randseed=1)
+
+g = Grad(f,[x,b],u)
+print("g=",g)
+
+TestFormula(g, randseed=1)
+
+g = AutoFactorize(g)
+print("g=",g)
+TestFormula(g, randseed=1)
\ No newline at end of file
diff --git a/keopscore/keopscore/utils/TestFormula.py b/keopscore/keopscore/utils/TestFormula.py
index 293a886b..f807d783 100644
--- a/keopscore/keopscore/utils/TestFormula.py
+++ b/keopscore/keopscore/utils/TestFormula.py
@@ -1,13 +1,12 @@
-import time
+from time import time
 import torch
 import numpy as np
 from torch.autograd import grad
 from pykeops.torch import Genred
 from keopscore.formulas import *
-import types
 
 
-def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=None):
+def TestFormula(formula, dtype="float32", test_grad=False, randseed=None):
     # N.B. dtype can be 'float32', 'float64' or 'float16'
 
     if isinstance(formula, str):
@@ -28,8 +27,8 @@ def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=No
     #####################################################################
     # Declare random inputs:
 
-    M = 3000
-    N = 5000
+    M = 300000
+    N = 500000
 
     # Choose the storage place for our data : CPU (host) or GPU (device) memory.
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -58,6 +57,11 @@ def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=No
     c = my_routine(*args)
 
     print("ok, no error")
+    start = time()
+    for k in range(10):
+        c = my_routine(*args)
+    end = time()
+    print("average time over 10 calls:", (end-start)/10)
     print("5 first values :", *c.flatten()[:5].tolist())
 
     ####################################################################
@@ -72,6 +76,12 @@ def TestFormula(formula, tol=1e-4, dtype="float32", test_grad=False, randseed=No
         g = grad(c, args, e)
 
         print("ok, no error")
+        start = time()
+        for k in range(10):
+            g = grad(c, args, e)
+        end = time()
+        print("average time over 10 calls:", (end-start)/10)
+
         for k in range(nargs):
             app_str = f"number {k}" if len(args) > 1 else ""
             print(
diff --git a/keopscore/keopscore/utils/Tree.py b/keopscore/keopscore/utils/Tree.py
index 528da6ac..beaf9319 100644
--- a/keopscore/keopscore/utils/Tree.py
+++ b/keopscore/keopscore/utils/Tree.py
@@ -68,10 +68,10 @@ def collect(self, attr, res=[]):
     def nice_print(self):
         import os
 
-        formula_string = self.__repr__()
+        formula_string = self.__str__()
         varstrings = []
         for v in self.Vars_:
-            var_string = v.__repr__()
+            var_string = v.__str__()
             formula_string = formula_string.replace(var_string, v.label)
             if v.ind >= 0:
                 varstrings.append(f"{v.label}={var_string}")
@@ -111,10 +111,12 @@ def recursive_fun(formula, rootindex, maxindex):
         print(f"Saved formula graph to file {filename}.")
 
     def __str__(self):
-        return self.nice_print()  # self.recursive_str()
+        return self.recursive_str() # self.nice_print()  # 
 
     def __repr__(self):
-        return self.recursive_str()
+        inner_strings = [child.__repr__() for child in self.children]
+        inner_strings += [param.__repr__() for param in self.params]
+        return self.string_id + "(" + ",".join(inner_strings) + ")"
 
     # custom __eq__ method
     def __eq__(self, other):
diff --git a/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py b/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py
index efe38a0f..ba6dbd16 100644
--- a/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py
+++ b/pykeops/pykeops/sandbox/test_lazytensor_gaussian.py
@@ -10,7 +10,7 @@
 
 dtype = torch.float32
 
-test_grad = False
+test_grad = True
 test_grad2 = False
 device_id = "cuda:0" if torch.cuda.is_available() else "cpu"
 do_warmup = True
@@ -29,7 +29,7 @@ def fun(x, y, b, backend):
     Dxy = ((x - y) ** 2).sum(dim=2)
     Kxy = (-Dxy).exp()
     if backend == "keops":
-        out = LazyTensor.__matmul__(Kxy, b, sum_scheme="direct_acc")
+        out = LazyTensor.__matmul__(Kxy, b)
     else:
         out = Kxy @ b
     if device_id != "cpu":