towards full ad support

lehner · Nov 9, 2023 · 958bf45 · 958bf45
1 parent 62395f0
commit 958bf45
Show file tree

Hide file tree

Showing 17 changed files with 268 additions and 38 deletions.
diff --git a/lib/gpt/ad/forward/foundation.py b/lib/gpt/ad/forward/foundation.py
@@ -46,3 +46,12 @@ def adj(sx):
 
 def sum(sx):
     return sx.distribute1(lambda a: g.sum(a))
+
+
+def identity(sx):
+    idsx = g.identity(sx[1])
+    return g.ad.forward.series({g.ad.forward.infinitesimal({}): idsx}, sx.landau_O)
+
+
+def infinitesimal_to_cartesian(src, dsrc):
+    return dsrc[1].otype.infinitesimal_to_cartesian(src, dsrc)
diff --git a/lib/gpt/ad/forward/series.py b/lib/gpt/ad/forward/series.py
@@ -25,9 +25,10 @@
 def promote(other, landau_O):
     if isinstance(other, infinitesimal):
         other = series({other: 1}, landau_O)
-    elif g.util.is_num(other):
-        other = series({infinitesimal({}): other}, landau_O)
-    return other
+    elif isinstance(other, series):
+        return other
+
+    return series({infinitesimal({}): other}, landau_O)
 
 
 class series(base):
@@ -104,6 +105,7 @@ def function(self, functional):
         return res
 
     def __iadd__(self, other):
+        other = promote(other, self.landau_O)
         res = self + other
         self.landau_O = res.landau_O
         self.terms = res.terms
@@ -150,16 +152,24 @@ def __sub__(self, other):
             if not landau_O.accept(t2):
                 continue
             if t2 not in terms:
-                terms[t2] = other.terms[t2]
+                terms[t2] = -other.terms[t2]
             else:
                 terms[t2] = g(terms[t2] - other.terms[t2])
         return series(terms, landau_O)
 
+    def __rsub__(self, other):
+        other = promote(other, self.landau_O)
+        return other - self
+
+    def __neg__(self):
+        return (-1.0) * self
+
     def __truediv__(self, other):
         return (1.0 / other) * self
 
     def __radd__(self, other):
-        return self.__add__(other, self)
+        other = promote(other, self.landau_O)
+        return other + self
 
     def __getitem__(self, tag):
         if tag == 1:
@@ -170,3 +180,8 @@ def __setitem__(self, tag, value):
         if tag == 1:
             tag = infinitesimal({})
         self.terms[tag] = value
+
+    def get_grid(self):
+        return self.terms[infinitesimal({})].grid
+
+    grid = property(get_grid)
diff --git a/lib/gpt/ad/reverse/foundation.py b/lib/gpt/ad/reverse/foundation.py
@@ -64,7 +64,31 @@ def _backward(z):
 
     return g.ad.reverse.node_base(_forward, _backward, (x,))
 
-
+
+def trace(x, t):
+    def _forward():
+        return g.trace(x.value, t)
+
+    # not allowed to capture z, otherwise have reference loop!
+    def _backward(z):
+        if x.with_gradient:
+            accumulate_gradient(x, g.identity(x.value) * z.gradient)
+
+    return g.ad.reverse.node_base(_forward, _backward, (x,))
+
+
+def sum(x):
+    def _forward():
+        return g.sum(x.value)
+
+    # not allowed to capture z, otherwise have reference loop!
+    def _backward(z):
+        if x.with_gradient:
+            accumulate_gradient(x, g.identity(x.value) * z.gradient)
+
+    return g.ad.reverse.node_base(_forward, _backward, (x,))
+
+
 def component_simple_map(operator, numpy_operator, extra_params, first, second):
     if operator == "relu":
         assert second is None

diff --git a/lib/gpt/ad/reverse/node.py b/lib/gpt/ad/reverse/node.py
@@ -17,7 +17,7 @@
 #    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 import gpt as g
-from gpt.ad.reverse.util import accumulate_gradient
+from gpt.ad.reverse.util import accumulate_gradient, is_field
 from gpt.ad.reverse import foundation
 from gpt.core.foundation import base
 
@@ -99,7 +99,9 @@ def __init__(self, _forward, _backward=lambda z: None, _children=(), with_gradie
     # print(gctr)
 
     def zero_gradient(self):
-        self.gradient = g(0.0 * self.value)
+        self.gradient = 0.0 * self.value
+        if isinstance(self.gradient, g.expr):
+            self.gradient = g(self.gradient)
 
     def __mul__(x, y):
         if not isinstance(x, node_base):
@@ -123,6 +125,9 @@ def _backward(z):
     def __rmul__(x, y):
         return node_base.__mul__(y, x)
 
+    def __truediv__(self, other):
+        return (1.0 / other) * self
+
     def __add__(x, y):
         if not isinstance(x, node_base):
             x = node_base(x, with_gradient=False)
@@ -161,6 +166,12 @@ def _backward(z):
 
         return node_base(_forward, _backward, (x, y))
 
+    def __rsub__(x, y):
+        return node_base.__sub__(y, x)
+
+    def __radd__(x, y):
+        return node_base.__add__(y, x)
+
     def forward(self, nodes, eager=True, free=None):
         max_fields_allocated = 0
         fields_allocated = 0
@@ -175,7 +186,7 @@ def forward(self, nodes, eager=True, free=None):
                         if m._forward is not None:
                             m.value = None
                             fields_allocated -= 1
-                if eager:
+                if eager and isinstance(n.value, g.expr):
                     n.value = g(n.value)
 
         if verbose_memory:
@@ -186,7 +197,10 @@ def forward(self, nodes, eager=True, free=None):
     def backward(self, nodes, first_gradient):
         fields_allocated = len(nodes)  # .values
         max_fields_allocated = fields_allocated
-        self.gradient = 1.0
+        if is_field(self.value):
+            raise Exception("Expression evaluates to a field.  Gradient calculation is not unique.")
+        self.zero_gradient()
+        self.gradient += 1.0
         for n in reversed(nodes):
             first_gradient_n = first_gradient[n]
             for m in first_gradient_n:
@@ -198,10 +212,11 @@ def backward(self, nodes, first_gradient):
             if n._forward is not None:
                 n.gradient = None
                 fields_allocated -= 1
-                if n._forward is not None:
-                    if n is not self:
-                        n.value = None
-                        fields_allocated -= 1
+                if n is not self:
+                    n.value = None
+                    fields_allocated -= 1
+            else:
+                n.gradient = g.infinitesimal_to_cartesian(n.value, n.gradient)
 
         if verbose_memory:
             g.message(
@@ -220,6 +235,11 @@ def __call__(self, with_gradients=True):
     def functional(self, *arguments):
         return node_differentiable_functional(self, arguments)
 
+    def get_grid(self):
+        return self.value.grid
+
+    grid = property(get_grid)
+
 
 def node(x, with_gradient=True):
     return node_base(x, with_gradient=with_gradient)
diff --git a/lib/gpt/ad/reverse/util.py b/lib/gpt/ad/reverse/util.py
@@ -28,6 +28,8 @@ def is_field(x):
         return x.lattice() is not None
     elif g.util.is_num(x):
         return False
+    elif isinstance(x, g.ad.forward.series):
+        return is_field(x[1])
     else:
         raise Exception(f"Unknown object type {type(x)}")
 
@@ -39,4 +41,18 @@ def accumulate_gradient(lhs, rhs_gradient):
         rhs_gradient = g.sum(rhs_gradient)
     if g.util.is_num(lhs.gradient) and isinstance(rhs_gradient, g.expr):
         rhs_gradient = g(rhs_gradient)
+
+    if isinstance(lhs.gradient, g.lattice) and isinstance(rhs_gradient, g.expr):
+        rhs_otype = rhs_gradient.lattice().otype
+        lhs_otype = lhs.gradient.otype
+        if lhs_otype.__name__ != rhs_otype.__name__:
+            if rhs_otype.spintrace[2] is not None:
+                if lhs_otype.__name__ == rhs_otype.spintrace[2]().__name__:
+                    rhs_gradient = g(g.spin_trace(rhs_gradient))
+                    rhs_otype = rhs_gradient.otype
+            if rhs_otype.colortrace[2] is not None:
+                if lhs_otype.__name__ == rhs_otype.colortrace[2]().__name__:
+                    rhs_gradient = g(g.color_trace(rhs_gradient))
+                    rhs_otype = rhs_gradient.otype
+
     lhs.gradient += rhs_gradient
diff --git a/lib/gpt/core/__init__.py b/lib/gpt/core/__init__.py
@@ -46,6 +46,7 @@
     slice,
     indexed_sum,
     identity,
+    infinitesimal_to_cartesian,
     project,
     where,
     scale_per_coordinate,

diff --git a/lib/gpt/core/expr.py b/lib/gpt/core/expr.py
@@ -293,7 +293,7 @@ def expr_eval(first, second=None, ac=False):
         return_list = False
     else:
         assert ac is False
-        if gpt.util.is_list_instance(first, (gpt.lattice, gpt.tensor)):
+        if not gpt.util.is_list_instance(first, gpt.expr):
             return first
 
         e = expr(first)

diff --git a/lib/gpt/core/foundation/lattice.py b/lib/gpt/core/foundation/lattice.py
@@ -103,3 +103,22 @@ def rank_sum(l):
 
 def sum(l):
     return l.grid.globalsum(rank_sum(l))
+
+
+def identity(src):
+    eye = gpt.lattice(src)
+    # identity only works for matrix types
+    n2 = len(eye.v_obj)
+    n = int(n2**0.5)
+    assert n * n == n2
+    for i in range(n):
+        for j in range(n):
+            if i == j:
+                cgpt.lattice_set_to_identity(eye.v_obj[i * n + j])
+            else:
+                cgpt.lattice_set_to_number(eye.v_obj[i * n + j], 0.0)
+    return eye
+
+
+def infinitesimal_to_cartesian(src, dsrc):
+    return dsrc.otype.infinitesimal_to_cartesian(src, dsrc)
diff --git a/lib/gpt/core/foundation/tensor.py b/lib/gpt/core/foundation/tensor.py
@@ -52,3 +52,7 @@ def adj(l):
     if l.transposable():
         return l.adj()
     return gpt.adj(gpt.expr(l))
+
+
+def infinitesimal_to_cartesian(src, dsrc):
+    return dsrc.otype.infinitesimal_to_cartesian(src, dsrc)
diff --git a/lib/gpt/core/object_type/container.py b/lib/gpt/core/object_type/container.py
@@ -121,6 +121,9 @@ def __init__(self, ndim):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, a, da):
+        return da
+
 
 ###
 # Matrices and vectors of spin
@@ -148,6 +151,9 @@ def cartesian(self):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, a, da):
+        return da
+
     def defect(self, field):
         return 0.0
 
@@ -196,6 +202,9 @@ def __init__(self, ndim):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, a, da):
+        return da
+
 
 ###
 # Matrices and vectors of both spin and color
@@ -231,6 +240,9 @@ def cartesian(self):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, a, da):
+        return da
+
     def identity(self):
         return gpt.matrix_spin_color(
             numpy.multiply.outer(numpy.identity(self.shape[0]), numpy.identity(self.shape[2])),
@@ -271,6 +283,9 @@ def __init__(self, spin_ndim, color_ndim):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, a, da):
+        return da
+
     def distribute(self, mat, dst, src, zero_lhs):
         src, dst = gpt.util.to_list(src), gpt.util.to_list(dst)
         if src[0].otype.__name__ == self.ot_matrix:

diff --git a/lib/gpt/core/object_type/su_n.py b/lib/gpt/core/object_type/su_n.py
@@ -96,6 +96,9 @@ def defect(self, A):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, A, dA):
+        return dA
+
     def inner_product(self, left, right):
         if self.trace_norm is None:
             gen = self.generators(left.grid.precision.complex_dtype)
@@ -128,6 +131,15 @@ def defect(self, U):
         err2 += gpt.norm2(gpt.matrix.det(U) - I_s) / gpt.norm2(I_s)
         return err2**0.5
 
+    def infinitesimal_to_cartesian(self, U, dU):
+        src = gpt(dU * gpt.adj(U))
+        N = self.shape[0]
+        ret = 0.5 * src - 0.5 * gpt.adj(src)
+        ret -= gpt.identity(src) * gpt.trace(ret) / N
+        ret = gpt(ret / 2j)
+        ret.otype = self.cartesian()
+        return ret
+
     def project(self, U, method):
         if method == "defect_right" or method == "defect":
             # V = V0(1 + eps)  with  dag(eps) = eps , dag(V0) V0 = 1

diff --git a/lib/gpt/core/object_type/u_1.py b/lib/gpt/core/object_type/u_1.py
@@ -59,6 +59,9 @@ def cartesian(self):
     def compose(self, a, b):
         return a + b
 
+    def infinitesimal_to_cartesian(self, A, dA):
+        return dA
+
     def generators(self, dt):
         return [complex(1.0, 0)]
 
@@ -82,6 +85,11 @@ def __init__(self):
     def compose(self, a, b):
         return a * b
 
+    def infinitesimal_to_cartesian(self, U, dU):
+        ret = gpt(dU * gpt.adj(U) / 1j)
+        ret.otype = self.cartesian()
+        return ret
+
     def defect(self, U):
         I = gpt.identity(U)
         err2 = gpt.norm2(U * gpt.adj(U) - I) / gpt.norm2(I)

diff --git a/lib/gpt/core/operator/unary.py b/lib/gpt/core/operator/unary.py
@@ -76,6 +76,8 @@ def adj(l):
         return l.adj()
     elif isinstance(l, gpt.core.foundation.base):
         return l.__class__.foundation.adj(l)
+    elif gpt.util.is_num(l):
+        return gpt.util.adj_num(l)
     else:
         return adj(gpt.expr(l))
 
@@ -117,6 +119,7 @@ def rank_sum(e):
         e = gpt.eval(e)
     return e.__class__.foundation.rank_sum(e)
 
+
 def sum(e):
     if isinstance(e, gpt.expr):
         e = gpt.eval(e)