Skip to content

Commit

Permalink
automatic differentiation 1) reverse accumulation
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Oct 30, 2023
1 parent cabcfaa commit afe74d2
Show file tree
Hide file tree
Showing 11 changed files with 362 additions and 0 deletions.
50 changes: 50 additions & 0 deletions lib/cgpt/lib/foundation/unary.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,53 @@ template<class obj> Lattice<obj> cgpt_mod(const Lattice<obj> &rhs_i,RealD y){
});
return ret_i;
}


static accelerator_inline ComplexD _cgpt_relu(const ComplexD & z, double y) { return (z.real() > 0) ? z : y*z; };
static accelerator_inline ComplexF _cgpt_relu(const ComplexF & z, double y) { return (z.real() > 0) ? z : ((float)y)*z; };
template <class scalar>
struct cgpt_relu_functor {
double y;
accelerator cgpt_relu_functor(double _y) : y(_y){};
accelerator scalar operator()(const scalar &a) const { return _cgpt_relu(a, y); }
};
template <class S, class V>
accelerator_inline Grid_simd<S, V> cgpt_relu(const Grid_simd<S, V> &r, double y) {
return SimdApply(cgpt_relu_functor<S>(y), r);
}
BINARY_RSCALAR(cgpt_relu,RealD);
template<class obj> Lattice<obj> cgpt_relu(const Lattice<obj> &rhs_i,RealD y){
Lattice<obj> ret_i(rhs_i.Grid());
autoView( rhs, rhs_i, AcceleratorRead);
autoView( ret, ret_i, AcceleratorWrite);
ret.Checkerboard() = rhs.Checkerboard();
accelerator_for(ss,rhs.size(),1,{
ret[ss]=cgpt_relu(rhs[ss],y);
});
return ret_i;
}


static accelerator_inline ComplexD _cgpt_drelu(const ComplexD & z, double y) { return (z.real() > 0) ? 1 : y; };
static accelerator_inline ComplexF _cgpt_drelu(const ComplexF & z, double y) { return (z.real() > 0) ? 1 : ((float)y); };
template <class scalar>
struct cgpt_drelu_functor {
double y;
accelerator cgpt_drelu_functor(double _y) : y(_y){};
accelerator scalar operator()(const scalar &a) const { return _cgpt_drelu(a, y); }
};
template <class S, class V>
accelerator_inline Grid_simd<S, V> cgpt_drelu(const Grid_simd<S, V> &r, double y) {
return SimdApply(cgpt_drelu_functor<S>(y), r);
}
BINARY_RSCALAR(cgpt_drelu,RealD);
template<class obj> Lattice<obj> cgpt_drelu(const Lattice<obj> &rhs_i,RealD y){
Lattice<obj> ret_i(rhs_i.Grid());
autoView( rhs, rhs_i, AcceleratorRead);
autoView( ret, ret_i, AcceleratorWrite);
ret.Checkerboard() = rhs.Checkerboard();
accelerator_for(ss,rhs.size(),1,{
ret[ss]=cgpt_drelu(rhs[ss],y);
});
return ret_i;
}
4 changes: 4 additions & 0 deletions lib/cgpt/lib/lattice/unary.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ void cgpt_unary_from(Lattice<T>& dst, const Lattice<T>& src, PyObject* params) {
dst = cgpt_sqrt(src);
} else if (op == "pow") {
dst = cgpt_pow(src, get_float(params,"exponent"));
} else if (op == "relu") {
dst = cgpt_relu(src, get_float(params,"a"));
} else if (op == "drelu") {
dst = cgpt_drelu(src, get_float(params,"a"));
} else if (op == "exp") {
dst = exp(src);
} else if (op == "log") {
Expand Down
1 change: 1 addition & 0 deletions lib/gpt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import gpt.qcd
import gpt.qis
import gpt.ml
import gpt.ad
import gpt.jobs
import socket
import cgpt
Expand Down
21 changes: 21 additions & 0 deletions lib/gpt/ad/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#
# GPT - Grid Python Toolkit
# Copyright (C) 2023 Christoph Lehner ([email protected], https://github.com/lehner/gpt)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
import gpt.ad.reverse

# import gpt.ad.forward
20 changes: 20 additions & 0 deletions lib/gpt/ad/reverse/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# GPT - Grid Python Toolkit
# Copyright (C) 2023 Christoph Lehner ([email protected], https://github.com/lehner/gpt)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
from gpt.ad.reverse.node import node
from gpt.ad.reverse.transform import inner_product, relu, norm2
115 changes: 115 additions & 0 deletions lib/gpt/ad/reverse/node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#
# GPT - Grid Python Toolkit
# Copyright (C) 2023 Christoph Lehner ([email protected], https://github.com/lehner/gpt)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
import gpt as g


def traverse(nodes, visited, n):
if id(n) not in visited:
visited.add(id(n))
for c in n._children:
traverse(nodes, visited, c)
nodes.append(n)


class node:
def __init__(self, _forward, _backward=lambda z: None, _children=(), with_gradient=True):
if not callable(_forward):
self._forward = lambda: _forward
else:
self._forward = _forward
self._backward = _backward
self._children = _children
self.with_gradient = with_gradient
self.value = None
self.gradient = None

def zero_gradient(self):
self.gradient = g(0 * self.value)

def __mul__(x, y):
if not isinstance(x, node):
x = node(x, with_gradient=False)

if not isinstance(y, node):
y = node(y, with_gradient=False)

def _forward():
return x.value * y.value

# not allowed to capture z, otherwise have reference loop!
def _backward(z):
if x.with_gradient:
x.gradient += z.gradient * g.adj(y.value)
if y.with_gradient:
y.gradient += g.adj(x.value) * z.gradient

return node(_forward, _backward, (x, y))

def __rmul__(x, y):
return node.__mul__(y, x)

def __add__(x, y):
def _forward():
return x.value + y.value

# not allowed to capture z, otherwise have reference loop!
def _backward(z):
if x.with_gradient:
x.gradient += z.gradient
if y.with_gradient:
y.gradient += z.gradient

return node(_forward, _backward, (x, y))

def __sub__(x, y):
def _forward():
return x.value - y.value

# not allowed to capture z, otherwise have reference loop!
def _backward(z):
if x.with_gradient:
x.gradient += z.gradient
if y.with_gradient:
y.gradient -= z.gradient

return node(_forward, _backward, (x, y))

def forward(self, nodes, eager=True):
for n in nodes:
n.value = n._forward()
if eager:
n.value = g(n.value)

def backward(self, nodes):
for n in nodes:
n.zero_gradient()
self.gradient = 1
for n in reversed(nodes):
n._backward(n)

def __call__(self, with_gradients=True):
visited = set([])
nodes = []
traverse(nodes, visited, self)
self.forward(nodes)
if with_gradients:
self.backward(nodes)
nodes = None
visited = None
return self.value
51 changes: 51 additions & 0 deletions lib/gpt/ad/reverse/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# GPT - Grid Python Toolkit
# Copyright (C) 2023 Christoph Lehner ([email protected], https://github.com/lehner/gpt)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
import gpt as g
from gpt.ad.reverse import node


def inner_product(x, y):
def _forward():
return g.inner_product(x.value, y.value)

# not allowed to capture z, otherwise have reference loop!
def _backward(z):
if x.with_gradient:
x.gradient += y.value * g.adj(z.gradient)
if y.with_gradient:
y.gradient += x.value * g.adj(z.gradient)

return node(_forward, _backward, (x, y))


def norm2(x):
return inner_product(x, x)


def relu(x, a=0.0):
def _forward():
return g.component.relu(a)(x.value)

# not allowed to capture z, otherwise have reference loop!
def _backward(z):
if x.with_gradient:
active = g.component.drelu(a)(x.value)
x.gradient += g.component.multiply(active, z.gradient)

return node(_forward, _backward, (x,))
8 changes: 8 additions & 0 deletions lib/gpt/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ def pow(exponent):
return _simple_map("pow", {"exponent": exponent})


def relu(a=0.0):
return _simple_map("relu", {"a": a})


def drelu(a=0.0):
return _simple_map("drelu", {"a": a})


def mod(n):
return _simple_map("mod", {"n": n})

Expand Down
9 changes: 9 additions & 0 deletions lib/gpt/core/object_type/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ def __init__(self, ndim):
self.__name__: (lambda: ot_singlet, (0, 0)),
}

def compose(self, a, b):
return a + b


###
# Matrices and vectors of spin
Expand Down Expand Up @@ -190,6 +193,9 @@ def __init__(self, ndim):
self.otab = {self.__name__: (lambda: ot_matrix_spin(ndim), [])}
self.itab = {self.__name__: (lambda: ot_singlet, (0, 0))}

def compose(self, a, b):
return a + b


###
# Matrices and vectors of both spin and color
Expand Down Expand Up @@ -262,6 +268,9 @@ def __init__(self, spin_ndim, color_ndim):
"ot_singlet": (lambda: self, None),
}

def compose(self, a, b):
return a + b

def distribute(self, mat, dst, src, zero_lhs):
src, dst = gpt.util.to_list(src), gpt.util.to_list(dst)
if src[0].otype.__name__ == self.ot_matrix:
Expand Down
Loading

0 comments on commit afe74d2

Please sign in to comment.