automatic differentiation 1) reverse accumulation

lehner · Oct 30, 2023 · afe74d2 · afe74d2
1 parent cabcfaa
commit afe74d2
Show file tree

Hide file tree

Showing 11 changed files with 362 additions and 0 deletions.
diff --git a/lib/cgpt/lib/foundation/unary.h b/lib/cgpt/lib/foundation/unary.h
@@ -188,3 +188,53 @@ template<class obj> Lattice<obj> cgpt_mod(const Lattice<obj> &rhs_i,RealD y){
   });
   return ret_i;
 }
+
+
+static accelerator_inline ComplexD _cgpt_relu(const ComplexD & z, double y) { return (z.real() > 0) ? z : y*z; };
+static accelerator_inline ComplexF _cgpt_relu(const ComplexF & z, double y) { return (z.real() > 0) ? z : ((float)y)*z; };
+template <class scalar>
+struct cgpt_relu_functor {
+  double y;
+  accelerator cgpt_relu_functor(double _y) : y(_y){};
+  accelerator scalar operator()(const scalar &a) const { return _cgpt_relu(a, y); }
+};
+template <class S, class V>
+accelerator_inline Grid_simd<S, V> cgpt_relu(const Grid_simd<S, V> &r, double y) {
+  return SimdApply(cgpt_relu_functor<S>(y), r);
+}
+BINARY_RSCALAR(cgpt_relu,RealD);
+template<class obj> Lattice<obj> cgpt_relu(const Lattice<obj> &rhs_i,RealD y){
+  Lattice<obj> ret_i(rhs_i.Grid());
+  autoView( rhs, rhs_i, AcceleratorRead);
+  autoView( ret, ret_i, AcceleratorWrite);
+  ret.Checkerboard() = rhs.Checkerboard();
+  accelerator_for(ss,rhs.size(),1,{
+      ret[ss]=cgpt_relu(rhs[ss],y);
+  });
+  return ret_i;
+}
+
+
+static accelerator_inline ComplexD _cgpt_drelu(const ComplexD & z, double y) { return (z.real() > 0) ? 1 : y; };
+static accelerator_inline ComplexF _cgpt_drelu(const ComplexF & z, double y) { return (z.real() > 0) ? 1 : ((float)y); };
+template <class scalar>
+struct cgpt_drelu_functor {
+  double y;
+  accelerator cgpt_drelu_functor(double _y) : y(_y){};
+  accelerator scalar operator()(const scalar &a) const { return _cgpt_drelu(a, y); }
+};
+template <class S, class V>
+accelerator_inline Grid_simd<S, V> cgpt_drelu(const Grid_simd<S, V> &r, double y) {
+  return SimdApply(cgpt_drelu_functor<S>(y), r);
+}
+BINARY_RSCALAR(cgpt_drelu,RealD);
+template<class obj> Lattice<obj> cgpt_drelu(const Lattice<obj> &rhs_i,RealD y){
+  Lattice<obj> ret_i(rhs_i.Grid());
+  autoView( rhs, rhs_i, AcceleratorRead);
+  autoView( ret, ret_i, AcceleratorWrite);
+  ret.Checkerboard() = rhs.Checkerboard();
+  accelerator_for(ss,rhs.size(),1,{
+      ret[ss]=cgpt_drelu(rhs[ss],y);
+  });
+  return ret_i;
+}
diff --git a/lib/cgpt/lib/lattice/unary.h b/lib/cgpt/lib/lattice/unary.h
@@ -30,6 +30,10 @@ void cgpt_unary_from(Lattice<T>& dst, const Lattice<T>& src, PyObject* params) {
     dst = cgpt_sqrt(src);
   } else if (op == "pow") {
     dst = cgpt_pow(src, get_float(params,"exponent"));
+  } else if (op == "relu") {
+    dst = cgpt_relu(src, get_float(params,"a"));
+  } else if (op == "drelu") {
+    dst = cgpt_drelu(src, get_float(params,"a"));
   } else if (op == "exp") {
     dst = exp(src);
   } else if (op == "log") {

diff --git a/lib/gpt/__init__.py b/lib/gpt/__init__.py
@@ -25,6 +25,7 @@
 import gpt.qcd
 import gpt.qis
 import gpt.ml
+import gpt.ad
 import gpt.jobs
 import socket
 import cgpt

diff --git a/lib/gpt/ad/__init__.py b/lib/gpt/ad/__init__.py
@@ -0,0 +1,21 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner ([email protected], https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+import gpt.ad.reverse
+
+# import gpt.ad.forward
diff --git a/lib/gpt/ad/reverse/__init__.py b/lib/gpt/ad/reverse/__init__.py
@@ -0,0 +1,20 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner ([email protected], https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+from gpt.ad.reverse.node import node
+from gpt.ad.reverse.transform import inner_product, relu, norm2
diff --git a/lib/gpt/ad/reverse/node.py b/lib/gpt/ad/reverse/node.py
@@ -0,0 +1,115 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner ([email protected], https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+import gpt as g
+
+
+def traverse(nodes, visited, n):
+    if id(n) not in visited:
+        visited.add(id(n))
+        for c in n._children:
+            traverse(nodes, visited, c)
+        nodes.append(n)
+
+
+class node:
+    def __init__(self, _forward, _backward=lambda z: None, _children=(), with_gradient=True):
+        if not callable(_forward):
+            self._forward = lambda: _forward
+        else:
+            self._forward = _forward
+        self._backward = _backward
+        self._children = _children
+        self.with_gradient = with_gradient
+        self.value = None
+        self.gradient = None
+
+    def zero_gradient(self):
+        self.gradient = g(0 * self.value)
+
+    def __mul__(x, y):
+        if not isinstance(x, node):
+            x = node(x, with_gradient=False)
+
+        if not isinstance(y, node):
+            y = node(y, with_gradient=False)
+
+        def _forward():
+            return x.value * y.value
+
+        # not allowed to capture z, otherwise have reference loop!
+        def _backward(z):
+            if x.with_gradient:
+                x.gradient += z.gradient * g.adj(y.value)
+            if y.with_gradient:
+                y.gradient += g.adj(x.value) * z.gradient
+
+        return node(_forward, _backward, (x, y))
+
+    def __rmul__(x, y):
+        return node.__mul__(y, x)
+
+    def __add__(x, y):
+        def _forward():
+            return x.value + y.value
+
+        # not allowed to capture z, otherwise have reference loop!
+        def _backward(z):
+            if x.with_gradient:
+                x.gradient += z.gradient
+            if y.with_gradient:
+                y.gradient += z.gradient
+
+        return node(_forward, _backward, (x, y))
+
+    def __sub__(x, y):
+        def _forward():
+            return x.value - y.value
+
+        # not allowed to capture z, otherwise have reference loop!
+        def _backward(z):
+            if x.with_gradient:
+                x.gradient += z.gradient
+            if y.with_gradient:
+                y.gradient -= z.gradient
+
+        return node(_forward, _backward, (x, y))
+
+    def forward(self, nodes, eager=True):
+        for n in nodes:
+            n.value = n._forward()
+            if eager:
+                n.value = g(n.value)
+
+    def backward(self, nodes):
+        for n in nodes:
+            n.zero_gradient()
+        self.gradient = 1
+        for n in reversed(nodes):
+            n._backward(n)
+
+    def __call__(self, with_gradients=True):
+        visited = set([])
+        nodes = []
+        traverse(nodes, visited, self)
+        self.forward(nodes)
+        if with_gradients:
+            self.backward(nodes)
+        nodes = None
+        visited = None
+        return self.value
diff --git a/lib/gpt/ad/reverse/transform.py b/lib/gpt/ad/reverse/transform.py
@@ -0,0 +1,51 @@
+#
+#    GPT - Grid Python Toolkit
+#    Copyright (C) 2023  Christoph Lehner ([email protected], https://github.com/lehner/gpt)
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+import gpt as g
+from gpt.ad.reverse import node
+
+
+def inner_product(x, y):
+    def _forward():
+        return g.inner_product(x.value, y.value)
+
+    # not allowed to capture z, otherwise have reference loop!
+    def _backward(z):
+        if x.with_gradient:
+            x.gradient += y.value * g.adj(z.gradient)
+        if y.with_gradient:
+            y.gradient += x.value * g.adj(z.gradient)
+
+    return node(_forward, _backward, (x, y))
+
+
+def norm2(x):
+    return inner_product(x, x)
+
+
+def relu(x, a=0.0):
+    def _forward():
+        return g.component.relu(a)(x.value)
+
+    # not allowed to capture z, otherwise have reference loop!
+    def _backward(z):
+        if x.with_gradient:
+            active = g.component.drelu(a)(x.value)
+            x.gradient += g.component.multiply(active, z.gradient)
+
+    return node(_forward, _backward, (x,))
diff --git a/lib/gpt/core/component.py b/lib/gpt/core/component.py
@@ -69,6 +69,14 @@ def pow(exponent):
     return _simple_map("pow", {"exponent": exponent})
 
 
+def relu(a=0.0):
+    return _simple_map("relu", {"a": a})
+
+
+def drelu(a=0.0):
+    return _simple_map("drelu", {"a": a})
+
+
 def mod(n):
     return _simple_map("mod", {"n": n})
 

diff --git a/lib/gpt/core/object_type/container.py b/lib/gpt/core/object_type/container.py
@@ -118,6 +118,9 @@ def __init__(self, ndim):
             self.__name__: (lambda: ot_singlet, (0, 0)),
         }
 
+    def compose(self, a, b):
+        return a + b
+
 
 ###
 # Matrices and vectors of spin
@@ -190,6 +193,9 @@ def __init__(self, ndim):
         self.otab = {self.__name__: (lambda: ot_matrix_spin(ndim), [])}
         self.itab = {self.__name__: (lambda: ot_singlet, (0, 0))}
 
+    def compose(self, a, b):
+        return a + b
+
 
 ###
 # Matrices and vectors of both spin and color
@@ -262,6 +268,9 @@ def __init__(self, spin_ndim, color_ndim):
             "ot_singlet": (lambda: self, None),
         }
 
+    def compose(self, a, b):
+        return a + b
+
     def distribute(self, mat, dst, src, zero_lhs):
         src, dst = gpt.util.to_list(src), gpt.util.to_list(dst)
         if src[0].otype.__name__ == self.ot_matrix: