From 3a5d777e311fb4e0b360c6aa643c8949e0e0a48a Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Mon, 8 Jul 2024 18:34:13 +0200 Subject: [PATCH] keep work --- lib/gpt/ad/reverse/foundation.py | 41 +++-- lib/gpt/ad/reverse/node.py | 96 +++++++--- lib/gpt/ad/reverse/transform.py | 5 +- lib/gpt/ad/reverse/util.py | 216 ++++++++++++++++------ lib/gpt/core/expr.py | 16 ++ lib/gpt/qcd/gauge/loops.py | 2 +- lib/gpt/qcd/gauge/stencil/topology.py | 253 ++++++++++++++++++++++---- tests/ad/ad.py | 3 + 8 files changed, 505 insertions(+), 127 deletions(-) diff --git a/lib/gpt/ad/reverse/foundation.py b/lib/gpt/ad/reverse/foundation.py index b5245648..0f63681f 100644 --- a/lib/gpt/ad/reverse/foundation.py +++ b/lib/gpt/ad/reverse/foundation.py @@ -17,7 +17,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # import gpt as g -from gpt.ad.reverse.util import accumulate_gradient +from gpt.ad.reverse.util import container, get_unary_container def inner_product(x, y, use_accelerator): @@ -31,11 +31,11 @@ def _forward(): # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: # z = adj(x) y -> x z = y -> x = y adj(z) - accumulate_gradient(x, y.value * g.adj(z.gradient)) + x.gradient += y.value * g.adj(z.gradient) # y.value * g.adj(z.gradient) if y.with_gradient: # z = adj(x) y -> y = x z - accumulate_gradient(y, x.value * z.gradient) + y.gradient += x.value * z.gradient - return {(0, 0): g.ad.reverse.node_base(_forward, _backward, (x, y))} + return {(0, 0): g.ad.reverse.node_base(_forward, _backward, (x, y), _container=container(complex), _tag="inner_product")} def norm2(x): @@ -52,9 +52,9 @@ def _forward(): # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, g.cshift(z.gradient, direction, -displacement)) + x.gradient += g.cshift(z.gradient, direction, -displacement) - return g.ad.reverse.node_base(_forward, _backward, (x,)) + return g.ad.reverse.node_base(_forward, _backward, (x,), _container=x._container, _tag="cshift(" + str(direction) + ", " + str(displacement) + ")") def adj(x): @@ -64,9 +64,9 @@ def _forward(): # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, g.adj(z.gradient)) + x.gradient += g.adj(z.gradient) - return g.ad.reverse.node_base(_forward, _backward, (x,)) + return g.ad.reverse.node_base(_forward, _backward, (x,), _container=x._container, _tag="adj") def trace(x, t): @@ -76,9 +76,11 @@ def _forward(): # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, g.identity(x.value) * z.gradient) + x.gradient += g.identity(x.value) * z.gradient - return g.ad.reverse.node_base(_forward, _backward, (x,)) + z_container = get_unary_container(x._container, lambda v: g.trace(v, t)) + + return g.ad.reverse.node_base(_forward, _backward, (x,), _container=z_container) def sum(x): @@ -88,9 +90,9 @@ def _forward(): # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, g.identity(x.value) * z.gradient) + x.gradient += g.identity(x.value) * z.gradient - return g.ad.reverse.node_base(_forward, _backward, (x,)) + return g.ad.reverse.node_base(_forward, _backward, (x,), _container=x._container.lattice_to_tensor()) def component_simple_map(operator, numpy_operator, extra_params, first, second): @@ -98,3 +100,18 @@ def component_simple_map(operator, numpy_operator, extra_params, first, second): assert second is None return g.ad.reverse.transform.relu(first, a=extra_params["a"]) raise Exception(f"component-wise operator {operator} not implemented in rev-AD") + + +def infinitesimal_to_cartesian(src, dsrc): + return src.value.otype.infinitesimal_to_cartesian(src, dsrc) + + +def identity(x): + def _forward(): + return g.identity(x.value) + + # not allowed to capture z, otherwise have reference loop! + def _backward(z): + pass + + return g.ad.reverse.node_base(_forward, _backward, (x,), _container=x._container, _tag="identity(" + str(x._container) + ")") diff --git a/lib/gpt/ad/reverse/node.py b/lib/gpt/ad/reverse/node.py index f01cb137..6371b0f3 100644 --- a/lib/gpt/ad/reverse/node.py +++ b/lib/gpt/ad/reverse/node.py @@ -17,7 +17,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # import gpt as g -from gpt.ad.reverse.util import accumulate_gradient, is_field +from gpt.ad.reverse.util import get_container, get_mul_container, convert_container from gpt.ad.reverse import foundation from gpt.core.foundation import base @@ -73,8 +73,22 @@ def gradient(self, fields, dfields): return [self.arguments[i].gradient for i in indices] -# gctr = 0 +def str_traverse(node, indent=0): + if not callable(node._forward): + return (" "*indent) + "leaf(" + str(node._container) + ")" + else: + pre = " "*indent + if node._tag is not None: + tag = node._tag + else: + tag = str(node._forward) + ret = pre + "(" + tag + "):" + for x in node._children: + ret = ret + "\n" + str_traverse(x, indent+1) + return ret + +# gctr = 0 class node_base(base): foundation = foundation @@ -87,30 +101,47 @@ def __init__( _children=(), with_gradient=True, infinitesimal_to_cartesian=True, + _container=None, + _tag=None ): # global gctr # gctr+=1 - if not callable(_forward): + if not callable(_forward) or isinstance(_forward, node_base): self._forward = None self.value = _forward + _container = get_container(_forward) else: self._forward = _forward self.value = None + assert _container is not None + self._container = _container self._backward = _backward self._children = _children self.with_gradient = with_gradient self.infinitesimal_to_cartesian = infinitesimal_to_cartesian self.gradient = None + self._tag = _tag # def __del__(self): # global gctr # gctr-=1 # print(gctr) + def __str__(self): + return str_traverse(self) + def zero_gradient(self): - self.gradient = 0.0 * self.value - if isinstance(self.gradient, g.expr): - self.gradient = g(self.gradient) + self.gradient = self._container.zero() + if isinstance(self.value, g.ad.forward.series): + gradient = 0.0 * self.value + for t in gradient.terms: + gradient.terms[t] = self.gradient + self.gradient = gradient + + value = self.value + while isinstance(value, node_base): + value = value.value + self.gradient = node_base(self.gradient) def __mul__(x, y): if not isinstance(x, node_base): @@ -119,17 +150,25 @@ def __mul__(x, y): if not isinstance(y, node_base): y = node_base(y, with_gradient=False) + z_container = get_mul_container(x._container, y._container) + + if x.with_gradient: + x = convert_container(x, z_container, y._container, lambda a, b: a * g.adj(b)) + + if y.with_gradient: + y = convert_container(y, x._container, z_container, lambda a, b: g.adj(a) * b) + def _forward(): return x.value * y.value # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, z.gradient * g.adj(y.value)) + x.gradient += z.gradient * g.adj(y.value) if y.with_gradient: - accumulate_gradient(y, g.adj(x.value) * z.gradient) + y.gradient += g.adj(x.value) * z.gradient - return node_base(_forward, _backward, (x, y)) + return node_base(_forward, _backward, (x, y), _container=z_container, _tag="*") def __rmul__(x, y): return node_base.__mul__(y, x) @@ -150,13 +189,15 @@ def setter(y, z): return x.project(getter, setter) def project(x, getter, setter): + assert False # for future use + def _forward(): return getter(x.value) # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, z.gradient, getter, setter) + x.gradient += z.gradient return node_base(_forward, _backward, (x,)) @@ -167,17 +208,20 @@ def __add__(x, y): if not isinstance(y, node_base): y = node_base(y, with_gradient=False) + assert x._container == y._container + _container = x._container + def _forward(): return x.value + y.value # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, z.gradient) + x.gradient += z.gradient if y.with_gradient: - accumulate_gradient(y, z.gradient) + y.gradient += z.gradient - return node_base(_forward, _backward, (x, y)) + return node_base(_forward, _backward, (x, y), _container=_container, _tag="+") def __sub__(x, y): if not isinstance(x, node_base): @@ -186,17 +230,20 @@ def __sub__(x, y): if not isinstance(y, node_base): y = node_base(y, with_gradient=False) + assert x._container == y._container + _container = x._container + def _forward(): return x.value - y.value # not allowed to capture z, otherwise have reference loop! def _backward(z): if x.with_gradient: - accumulate_gradient(x, z.gradient) + x.gradient += z.gradient if y.with_gradient: - accumulate_gradient(y, -z.gradient) + y.gradient -= z.gradient - return node_base(_forward, _backward, (x, y)) + return node_base(_forward, _backward, (x, y), _container=_container, _tag="-") def __rsub__(x, y): return node_base.__sub__(y, x) @@ -219,7 +266,8 @@ def forward(self, nodes, eager=True, free=None): m.value = None fields_allocated -= 1 if eager and isinstance(n.value, g.expr): - n.value = g(n.value) + if not n.value.is_adj(): + n.value = g(n.value) if verbose_memory: g.message( @@ -230,16 +278,16 @@ def backward(self, nodes, first_gradient, initial_gradient): fields_allocated = len(nodes) # .values max_fields_allocated = fields_allocated if initial_gradient is None: - if is_field(self.value): + if self._container.is_field(): raise Exception( "Expression evaluates to a field. Gradient calculation is not unique." ) - if isinstance(self.value, complex) and abs(self.value.imag) > 1e-12 * abs( - self.value.real - ): - raise Exception( - f"Expression does not evaluate to a real number ({self.value}). Gradient calculation is not unique." - ) + #if isinstance(self._container[0], complex) and abs(self.value.imag) > 1e-12 * abs( + # self.value.real + #): + # raise Exception( + # f"Expression does not evaluate to a real number ({self.value}). Gradient calculation is not unique." + # ) initial_gradient = 1.0 self.zero_gradient() self.gradient += initial_gradient diff --git a/lib/gpt/ad/reverse/transform.py b/lib/gpt/ad/reverse/transform.py index a6734212..f10b5ebc 100644 --- a/lib/gpt/ad/reverse/transform.py +++ b/lib/gpt/ad/reverse/transform.py @@ -18,7 +18,6 @@ # import gpt as g from gpt.ad.reverse import node_base -from gpt.ad.reverse.util import accumulate_gradient def relu(x, a=0.0): @@ -29,6 +28,6 @@ def _forward(): def _backward(z): if x.with_gradient: active = g.component.drelu(a)(x.value) - accumulate_gradient(x, g.component.multiply(active, z.gradient)) + x.gradient += g.component.multiply(active, z.gradient) - return node_base(_forward, _backward, (x,)) + return node_base(_forward, _backward, (x,), _container=x._container) diff --git a/lib/gpt/ad/reverse/util.py b/lib/gpt/ad/reverse/util.py index c75bf5e6..2213f7b8 100644 --- a/lib/gpt/ad/reverse/util.py +++ b/lib/gpt/ad/reverse/util.py @@ -17,26 +17,14 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # import gpt as g - - -def is_field(x): - if isinstance(x, g.lattice): - return True - elif isinstance(x, g.tensor): - return False - elif isinstance(x, g.expr): - return x.container()[0] is not None - elif g.util.is_num(x): - return False - elif isinstance(x, g.ad.forward.series): - for t in x.terms: - return is_field(x[t]) - raise Exception("Empty series") - else: - raise Exception(f"Unknown object type {type(x)}") +import numpy as np def accumulate_compatible(a, b): + if a == complex: + a = g.ot_singlet() + if b == complex: + b = g.ot_singlet() if a.data_alias is not None: a = a.data_alias() if b.data_alias is not None: @@ -44,38 +32,164 @@ def accumulate_compatible(a, b): return a.__name__ == b.__name__ -def accumulate_gradient(lhs, rhs_gradient, getter=None, setter=None): - lhs_gradient = lhs.gradient - if getter is not None: - lhs_gradient = getter(lhs_gradient) - rhs_field = is_field(rhs_gradient) - lhs_field = is_field(lhs_gradient) - if rhs_field and not lhs_field: - rhs_gradient = g.sum(rhs_gradient) - if g.util.is_num(lhs_gradient) and isinstance(rhs_gradient, g.expr): - rhs_gradient = g(rhs_gradient) - - if isinstance(lhs_gradient, g.lattice) and isinstance(rhs_gradient, g.expr): - grid, rhs_otype, is_list, nlist = rhs_gradient.container() - assert not is_list # for now - lhs_otype = lhs_gradient.otype - - if lhs_otype.__name__ != rhs_otype.__name__: - if rhs_otype.spintrace[2] is not None: - rhs_spintrace_otype = rhs_otype.spintrace[2]() - if accumulate_compatible(lhs_otype, rhs_spintrace_otype): - rhs_gradient = g(g.spin_trace(rhs_gradient)) - rhs_otype = rhs_gradient.otype - elif rhs_spintrace_otype.colortrace[2] is not None: - if accumulate_compatible(lhs_otype, rhs_spintrace_otype.colortrace[2]()): - rhs_gradient = g(g.trace(rhs_gradient)) - rhs_otype = rhs_gradient.otype - if rhs_otype.colortrace[2] is not None: - if accumulate_compatible(lhs_otype, rhs_otype.colortrace[2]()): - rhs_gradient = g(g.color_trace(rhs_gradient)) - rhs_otype = rhs_gradient.otype - - if setter is not None: - setter(lhs.gradient, lhs_gradient + rhs_gradient) +class container: + def __init__(self, *tag): + if isinstance(tag[0], tuple): + tag = tag[0] + if tag[1] is None: + tag=[complex] + elif tag[0] is None: + tag=[g.tensor,tag[1]] + else: + tag=[g.lattice, tag[0], tag[1]] + + self.tag = tag + + if self.tag[0] == g.tensor: + otype = self.tag[1] + while otype.data_alias is not None: + otype = otype.data_alias() + if otype.__name__ == "ot_singlet": + self.tag = [complex] + + def is_field(self): + return self.tag[0] == g.lattice + + def representative(self): + return self.tag[0](*self.tag[1:]) + + def lattice_to_tensor(self): + assert self.tag[0] == g.lattice + return container(g.tensor, self.tag[2]) + + def zero(self): + r = self.representative() + if isinstance(r, g.lattice): + r[:] = 0 + elif isinstance(r, g.tensor): + r *= 0 + elif isinstance(r, complex): + r = 0.0 + else: + raise Exception("Unknown type") + return r + + def __eq__(self, other): + return str(self) == str(other) + + def __str__(self): + r = str(self.tag[0].__name__) + if len(self.tag) > 1: + r = r + ";" + self.tag[-1].__name__ + if len(self.tag) == 3: + r = r + ";" + str(self.tag[1].obj) + return r + + +def get_container(x): + if isinstance(x, g.ad.reverse.node_base): + return get_container(x.value) + elif isinstance(x, g.ad.forward.series): + for t in x.terms: + return get_container(x[t]) + raise Exception("Empty series") + elif isinstance(x, g.lattice): + return container(g.lattice, x.grid, x.otype) + elif isinstance(x, g.tensor): + return container(g.tensor, x.otype) + elif g.util.is_num(x): + return container(complex) else: - lhs.gradient += rhs_gradient + raise Exception(f"Unknown object type {type(x)}") + + +def get_mul_container(x, y): + rx = x.representative() + ry = y.representative() + return container((g.expr(rx) * g.expr(ry)).container()) + + +def get_unary_container(x, unary): + rx = x.representative() + return container(g.expr(unary(rx)).container()) + + +def convert_container(v, x, y, operand): + rx = x.representative() + ry = y.representative() + + r = g.expr(operand(rx, ry)) + c = container(r.container()) + + if v._container == c: + return v + + # conversions from tensor to matrix + backward_sum = False + backward_spin_trace = False + backward_color_trace = False + backward_trace = False + + if v._container.tag[0] != g.lattice and c.tag[0] == g.lattice: + backward_sum = True + + # now check otypes + if v._container.tag[-1].__name__ != c.tag[-1].__name__: + rhs_otype = c.tag[-1] + lhs_otype = v._container.tag[-1] + + if rhs_otype.spintrace[2] is not None: + rhs_spintrace_otype = rhs_otype.spintrace[2]() + if accumulate_compatible(lhs_otype, rhs_spintrace_otype): + backward_spin_trace = True + rhs_otype = rhs_spintrace_otype + elif rhs_spintrace_otype.colortrace[2] is not None: + rhs_trace_otype = rhs_spintrace_otype.colortrace[2]() + if accumulate_compatible(lhs_otype, rhs_trace_otype): + backward_trace = True + rhs_otype = rhs_trace_otype + if rhs_otype.colortrace[2] is not None: + rhs_colortrace_otype = rhs_otype.colortrace[2]() + if accumulate_compatible(lhs_otype, rhs_colortrace_otype): + backward_color_trace = True + rhs_otype = rhs_colortrace_otype + + if not accumulate_compatible(rhs_otype, lhs_otype): + raise Exception("Conversion incomplete:" + rhs_otype.__name__ + ":" + lhs_otype.__name__) + + # g.message("Need to modify to",v._container,"from",c,":",backward_sum, backward_trace, backward_spin_trace, backward_color_trace) + assert backward_trace or backward_color_trace or backward_spin_trace or backward_sum + + + def _forward(): + value = v.value + + #if backward_sum: + # r = c.representative() + # r[:] = value + # value = r + # print("test",backward_trace,backward_spin_trace,backward_color_trace) + + return value + + def _backward(z): + if v.with_gradient: + gradient = z.gradient + + if backward_trace: + gradient = g.trace(gradient) + + if backward_color_trace: + gradient = g.color_trace(gradient) + + if backward_spin_trace: + gradient = g.spin_trace(gradient) + + if backward_sum: + gradient = g.sum(gradient) + + v.gradient += g(gradient) if backward_trace or backward_color_trace or backward_spin_trace else gradient + + # print("Ran conversion with sum/tr",backward_sum,backward_trace,backward_spin_trace,backward_color_trace) + + return g.ad.reverse.node_base(_forward, _backward, (v,), _container=c, _tag="change to " + str(c) + " from " + str(v._container)) diff --git a/lib/gpt/core/expr.py b/lib/gpt/core/expr.py index 401248e3..f3af60f0 100644 --- a/lib/gpt/core/expr.py +++ b/lib/gpt/core/expr.py @@ -142,6 +142,10 @@ def __init__(self, val, unary=expr_unary.NONE): raise Exception("Unknown type " + str(type(val))) self.unary = unary + def is_adj(self): + b = len(self.val) == 1 and len(self.val[0][1]) == 1 and self.val[0][1][0][0] == factor_unary.ADJ + return b + def is_single(self, t=None): b = len(self.val) == 1 and self.val[0][0] == 1.0 and len(self.val[0][1]) == 1 if t is not None: @@ -173,6 +177,18 @@ def container(self): n = len(representative) otype = get_otype_from_expression(self) return grid, otype, return_list, n + + for v in self.val: + for i in v[1]: + if gpt.util.is_list_instance(i[1], gpt.tensor): + representative = i[1] + return_list = isinstance(representative, list) + representative = gpt.util.to_list(representative) + grid = None + n = len(representative) + otype = get_otype_from_expression(self) + return grid, otype, return_list, n + return None, None, None, None def __mul__(self, l): diff --git a/lib/gpt/qcd/gauge/loops.py b/lib/gpt/qcd/gauge/loops.py index 78a2fec8..4da3520b 100644 --- a/lib/gpt/qcd/gauge/loops.py +++ b/lib/gpt/qcd/gauge/loops.py @@ -168,5 +168,5 @@ def field_strength(U, mu, nu): ) F = g.eval(U[mu] * v + g.cshift(v * U[mu], mu, -1)) - F @= 0.125 * (F - g.adj(F)) + F = 0.125 * (F - g.adj(F)) return F diff --git a/lib/gpt/qcd/gauge/stencil/topology.py b/lib/gpt/qcd/gauge/stencil/topology.py index fd6b3377..35e76e2b 100644 --- a/lib/gpt/qcd/gauge/stencil/topology.py +++ b/lib/gpt/qcd/gauge/stencil/topology.py @@ -211,16 +211,133 @@ def projected_gradient_F_projected_gradient(U, mu, nu, right_left, outer_right_l right_left_s = g.cshift(right_left, mu, 1) v_val = v(U, mu, nu) - grad = v_projected_gradient(U, mu, nu, g(right_left * U[mu])) - grad2 = v_projected_gradient(U, mu, nu, g(U[mu] * right_left_s)) + + one = g.identity(g.group.cartesian(U[0])) + grad = projected_gradient_v_projected_gradient(U, mu, nu, right_left, one, outer_right_left) + grad2 = projected_gradient_v_projected_gradient(U, mu, nu, one, right_left_s, outer_right_left) for rho in range(Nd): grad[rho] += grad2[rho] - grad[mu] -= g.qcd.gauge.project.traceless_hermitian(U[mu] * v_val * right_left / 2j) - grad[mu] -= g.qcd.gauge.project.traceless_hermitian(U[mu] * right_left_s * v_val / 2j) + one = g.identity(g.lattice(grad[0])) + otype = grad[0].otype + + #grad[mu] -= g.qcd.gauge.project.traceless_hermitian(U[mu] * v_val * right_left / 2j) + # v_val_1 + grad[mu] -= projected_gradient_traceless_hermitian( + U[mu] * g.cshift(U[nu], mu, 1) * g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) * right_left / 2j, + one, + outer_right_left[mu], + otype + ) + + grad[mu] -= projected_gradient_traceless_hermitian( + - U[mu] * g.adj(g.cshift(U[mu] * g.cshift(U[nu], mu, 1), nu, -1)) / 2j, + g.cshift(g.adj(right_left) * U[nu], nu, -1), + g.cshift(outer_right_left[mu], nu, -1), + otype + ) + + grad[nu] -= projected_gradient_traceless_hermitian( + U[nu] * g.cshift(g.cshift(g.adj(U[mu]), nu, 1) * g.adj(U[nu]) * right_left, mu, -1) / 2j, + g.cshift(U[mu], mu, -1), + g.cshift(outer_right_left[mu], mu, -1), + otype + ) + + grad[nu] -= projected_gradient_traceless_hermitian( + - U[nu] * g.cshift(U[mu], nu, 1) * g.cshift(g.adj(U[nu]), mu, 1) * g.adj(U[mu]) / 2j, + g.adj(right_left), + outer_right_left[mu], + otype + ) + + # v_val_2 + grad[mu] += projected_gradient_traceless_hermitian( + U[mu] * g.cshift(g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]) * U[nu], nu, -1) * right_left / 2j, + one, + outer_right_left[mu], + otype + ) + + grad[mu] += projected_gradient_traceless_hermitian( + - U[mu] * g.adj(g.cshift(U[mu], nu, 1) * g.adj(g.cshift(U[nu], mu, 1)))/ 2j, + g.adj(U[nu] * g.cshift(right_left, nu, 1)), + g.cshift(outer_right_left[mu], nu, 1), + otype + ) + + grad[nu] += projected_gradient_traceless_hermitian( + - U[nu] * g.cshift(g.cshift(g.adj(U[mu]), nu, 1), mu, -1) / 2j, + g.adj(g.cshift(g.adj(U[mu]) * U[nu] * g.cshift(right_left, nu, 1), mu, -1)), + g.cshift(g.cshift(outer_right_left[mu], nu, 1), mu, -1), + otype + ) + + grad[nu] += projected_gradient_traceless_hermitian( + U[nu] * g.cshift(right_left, nu, 1) / 2j, + g.cshift(U[mu], nu, 1) * g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]), + g.cshift(outer_right_left[mu], nu, 1), + otype + ) + + #grad[mu] -= g.qcd.gauge.project.traceless_hermitian(g(U[mu] * right_left_s * v_val) / 2j) + grad[mu] -= projected_gradient_traceless_hermitian( + U[mu] * right_left_s * g.cshift(U[nu], mu, 1) * g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) / 2j, + one, + outer_right_left[mu], + otype + ) + + grad[mu] -= projected_gradient_traceless_hermitian( + - U[mu] * g.adj(g.cshift(U[mu] * right_left_s * g.cshift(U[nu], mu, 1), nu, -1)) / 2j, + g.adj(g.cshift(g.adj(U[nu]), nu, -1)), + g.cshift(outer_right_left[mu], nu, -1), + otype + ) + + grad[nu] -= projected_gradient_traceless_hermitian( + U[nu] * g.cshift(g.cshift(g.adj(U[mu]), nu, 1) * g.adj(U[nu]), mu, -1) / 2j, + g.cshift(U[mu] * right_left_s, mu, -1), + g.cshift(outer_right_left[mu], mu, -1), + otype + ) + + grad[nu] -= projected_gradient_traceless_hermitian( + - U[nu] * g.cshift(U[mu], nu, 1) * g.cshift(g.adj(U[nu]), mu, 1) * g.adj(U[mu] * right_left_s) / 2j, + one, + outer_right_left[mu], + otype + ) + + #v_val_2 + grad[mu] += projected_gradient_traceless_hermitian( + U[mu] * right_left_s * g.cshift(g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]) * U[nu], nu, -1) / 2j, + one, + outer_right_left[mu], + otype + ) + + grad[mu] += projected_gradient_traceless_hermitian( + - U[mu] * g.adj(g.cshift(U[mu] * right_left_s, nu, 1) * g.adj(g.cshift(U[nu], mu, 1)))/ 2j, + g.adj(U[nu]), + g.cshift(outer_right_left[mu], nu, 1), + otype + ) + + grad[nu] += projected_gradient_traceless_hermitian( + - U[nu] * g.cshift(g.cshift(g.adj(U[mu] * right_left_s), nu, 1), mu, -1) / 2j, + g.adj(g.cshift(g.adj(U[mu]) * U[nu], mu, -1)), + g.cshift(g.cshift(outer_right_left[mu], nu, 1), mu, -1), + otype + ) + + grad[nu] += projected_gradient_traceless_hermitian( + U[nu] / 2j, + g.cshift(U[mu] * right_left_s, nu, 1) * g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]), + g.cshift(outer_right_left[mu], nu, 1), + otype + ) - # left * U[mu] * v * right + left * g.cshift(v * U[mu], mu, -1) * right - assert False return grad @@ -228,16 +345,36 @@ def projected_gradient_field_strength_projected_gradient(U, mu, nu, right_left, Nd = len(U) assert Nd == 4 - fg1 = F_projected_gradient(U, mu, nu, g(right_left)) - fg2 = F_projected_gradient(U, mu, nu, g(g.adj(right_left))) + fg1 = projected_gradient_F_projected_gradient(U, mu, nu, g(right_left), outer_right_left) + fg2 = projected_gradient_F_projected_gradient(U, mu, nu, g(g.adj(right_left)), outer_right_left) grad = [] for mu in range(Nd): grad.append(g(0.125 * (fg1[mu] - g.adj(fg2[mu])))) - assert False + return grad -def projected_gradient_topological_charge_gradient(left, U): +def topological_charge_gradient2(U): + field_strength = g.qcd.gauge.field_strength + + Bx = field_strength(U, 1, 2) + + delta = g( + + g.expr(field_strength_projected_gradient(U, 3, 0, Bx)) + ) + + coeff = 8.0 / (32.0 * np.pi**2) + + ret = [] + for d in delta: + dQ = g(coeff * d) + dQ.otype = U[0].otype.cartesian() + ret.append(dQ) + + return ret + + +def projected_gradient_topological_charge_gradient(U, outer): field_strength = g.qcd.gauge.field_strength Bx = field_strength(U, 1, 2) @@ -248,17 +385,15 @@ def projected_gradient_topological_charge_gradient(left, U): Ey = field_strength(U, 3, 1) Ez = field_strength(U, 3, 2) - if mask is None: - mask = g.complex(U[0].grid) - mask[:] = 1.0 - delta = g( - g.expr(field_strength_projected_gradient(U, 1, 2, g(mask * Ex))) - + g.expr(field_strength_projected_gradient(U, 3, 0, g(mask * Bx))) - + g.expr(field_strength_projected_gradient(U, 2, 0, g(mask * Ey))) - + g.expr(field_strength_projected_gradient(U, 3, 1, g(mask * By))) - + g.expr(field_strength_projected_gradient(U, 0, 1, g(mask * Ez))) - + g.expr(field_strength_projected_gradient(U, 3, 2, g(mask * Bz))) + #g.expr(field_strength_projected_gradient(U, 1, 2, g(mask * Ex))) + + g.expr(projected_gradient_field_strength_projected_gradient(U, 3, 0, Bx, outer)) + #+ g.expr(field_strength_projected_gradient(U, 3, 0, field_strength_projected_gradient_dinput(U, 3, 0, outer))) + + #+ g.expr(field_strength_projected_gradient(U, 2, 0, g(mask * Ey))) + #+ g.expr(field_strength_projected_gradient(U, 3, 1, g(mask * By))) + #+ g.expr(field_strength_projected_gradient(U, 0, 1, g(mask * Ez))) + #+ g.expr(field_strength_projected_gradient(U, 3, 2, g(mask * Bz))) ) coeff = 8.0 / (32.0 * np.pi**2) @@ -284,10 +419,12 @@ def projected_gradient_traceless_hermitian(U_L, U_R, R_L, otype): return A -def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_left): +def projected_gradient_v_projected_gradient(U, mu, nu, right, left, outer_right_left): Nd = len(U) assert Nd == 4 + right_left = g(right * U[mu] * left) + grad = [g.group.cartesian(U[0]) for i in range(Nd)] for gr in grad: gr[:] = 0 @@ -306,7 +443,14 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l outer_right_left[mu], otype ) - + + grad[mu] += projected_gradient_traceless_hermitian( + U[mu] * left * g.cshift(U[nu], mu, 1) * g.cshift(g.adj(U[mu]), nu, 1) / 2j, + g.adj(U[nu]) * right, + g.cshift(outer_right_left[mu], nu, 1), + otype + ) + grad[nu] += projected_gradient_traceless_hermitian( - U[nu] / 2j, g.cshift(U[mu], nu, 1) * g.cshift(g.adj(U[nu]), mu, 1) * g.adj(right_left), @@ -330,6 +474,13 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l otype ) + grad[mu] -= projected_gradient_traceless_hermitian( + U[mu] * left * g.cshift(g.cshift(g.adj(U[nu]), mu, 1) * g.adj(U[mu]), nu, -1) / 2j, + g.cshift(U[nu], nu, -1) * right, + g.cshift(outer_right_left[mu], nu, -1), + otype + ) + grad[nu] -= projected_gradient_traceless_hermitian( U[nu] * g.cshift(right_left, nu, 1) * g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]) / 2j, one, @@ -344,10 +495,7 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l otype ) - #grad[nu] -= U[nu] * g.cshift( - # g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) * right * left, mu, -1 - #) - + #grad[nu] -= U[nu] * g.cshift(g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) * right * left, mu, -1) grad[mu] -= projected_gradient_traceless_hermitian( - U[mu] * g.cshift(g.cshift(g.adj(U[nu]), mu, 1), nu, -1) / 2j, g.cshift(g.adj(right_left) * U[nu], nu, -1), @@ -355,22 +503,28 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l otype ) + grad[mu] -= projected_gradient_traceless_hermitian( + U[mu] * left / 2j, + g.cshift(U[nu], mu, 1) * g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) * right, + g.cshift(outer_right_left[nu], mu, 1), + otype + ) + grad[nu] -= projected_gradient_traceless_hermitian( U[nu] * g.cshift(g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) * right_left, mu, -1) / 2j, one, outer_right_left[nu], otype ) - + grad[nu] -= projected_gradient_traceless_hermitian( -U[nu] * g.cshift(U[mu], nu, 1) * g.cshift(g.adj(U[nu]), mu, 1) / 2j, g.adj(right_left), g.cshift(outer_right_left[nu], mu, 1), otype ) - + #grad[nu] -= g.cshift(g.adj(U[mu]) * U[nu] * g.cshift(right_left, nu, 1), mu, -1) * g.adj(U[nu]) - grad[mu] -= projected_gradient_traceless_hermitian( - U[mu] / 2j, g.cshift(U[nu], mu, 1) * g.cshift(g.adj(right_left), nu, 1) * g.adj(U[nu]), @@ -378,20 +532,27 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l otype ) + grad[mu] -= projected_gradient_traceless_hermitian( + U[mu] * left * g.cshift(g.cshift(g.adj(U[nu]), mu, 1), nu, -1) / 2j, + g.cshift(g.adj(U[mu]) * U[nu], nu, -1) * right, + g.cshift(g.cshift(outer_right_left[nu], mu, 1), nu, -1), + otype + ) + grad[nu] -= projected_gradient_traceless_hermitian( U[nu] * g.cshift(right_left, nu, 1) * g.cshift(g.adj(U[nu]), mu, 1) / 2j, g.adj(U[mu]), g.cshift(outer_right_left[nu], mu, 1), otype ) - + grad[nu] -= projected_gradient_traceless_hermitian( -U[nu] * g.cshift(g.cshift(g.adj(right_left), nu, 1) * g.adj(U[nu]) * U[mu], mu, -1) / 2j, one, outer_right_left[nu], otype ) - + # grad[nu] += right_left * g.cshift(U[nu], mu, 1) * g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) grad[mu] += projected_gradient_traceless_hermitian( - U[mu] * g.cshift(g.cshift(g.adj(U[nu]), mu, 1) * g.adj(right_left), nu, -1) / 2j, @@ -400,20 +561,27 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l otype ) + grad[mu] += projected_gradient_traceless_hermitian( + U[mu] * left * g.cshift(U[nu], mu, 1) * g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]) / 2j, + right, + outer_right_left[nu], + otype + ) + grad[nu] += projected_gradient_traceless_hermitian( U[nu] * g.cshift(g.adj(g.cshift(U[mu], nu, 1)) * g.adj(U[nu]), mu, -1) / 2j, g.cshift(right_left, mu, -1), g.cshift(outer_right_left[nu], mu, -1), otype ) - + grad[nu] += projected_gradient_traceless_hermitian( - U[nu] * g.cshift(U[mu], nu, 1) * g.cshift(g.adj(U[nu]), mu, 1) * g.adj(right_left) / 2j, one, outer_right_left[nu], otype ) - + # grad[nu] += U[nu] * g.cshift(right_left, nu, 1) * g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]) grad[mu] += projected_gradient_traceless_hermitian( - U[mu] * g.cshift(U[nu], mu, 1) * g.cshift(g.adj(right_left), nu, 1) * g.adj(U[nu]) / 2j, @@ -422,13 +590,20 @@ def projected_gradient_v_projected_gradient(U, mu, nu, right_left, outer_right_l otype ) + grad[mu] += projected_gradient_traceless_hermitian( + U[mu] * left * g.cshift(g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]), nu, -1) / 2j, + g.cshift(U[nu], nu, -1) * right, + g.cshift(outer_right_left[nu], nu, -1), + otype + ) + grad[nu] += projected_gradient_traceless_hermitian( U[nu] * g.cshift(right_left, nu, 1) * g.adj(g.cshift(U[nu], mu, 1)) * g.adj(U[mu]) / 2j, one, outer_right_left[nu], otype ) - + grad[nu] += projected_gradient_traceless_hermitian( - U[nu] * g.adj(g.cshift(U[nu] * g.cshift(right_left, nu, 1), mu, -1)) / 2j, g.cshift(U[mu], mu, -1), @@ -453,14 +628,20 @@ def __init__(self, vector): def __call__(self, U): # return g.inner_product(self.left, g.qcd.gauge.project.traceless_hermitian(U[0])).real - gradient = v_projected_gradient(U, 0, 1, g(self.right * self.left)) + gradient = v_projected_gradient(U, 0, 1, g(self.right * U[0] * self.left)) + #gradient = F_projected_gradient(U, 0, 1, g(self.right * self.left)) + #gradient = field_strength_projected_gradient(U, 0, 1, g(self.right * self.left)) + #gradient = topological_charge_gradient2(U) return sum([g.inner_product(self.vector[mu], gradient[mu]).real for mu in range(len(gradient))]) def gradient(self, U, dU): #gradient = projected_gradient_traceless_hermitian(U[0], g.adj(self.left)) #ret = [gradient] - gradient = projected_gradient_v_projected_gradient(U, 0, 1, g(self.right * self.left), g(g.adj(self.vector))) + gradient = projected_gradient_v_projected_gradient(U, 0, 1, self.right, self.left, g(g.adj(self.vector))) + #gradient = projected_gradient_F_projected_gradient(U, 0, 1, g(self.right * self.left), g(g.adj(self.vector))) + #gradient = projected_gradient_field_strength_projected_gradient(U, 0, 1, g(self.right * self.left), g(g.adj(self.vector))) + #gradient = projected_gradient_topological_charge_gradient(U, g(g.adj(self.vector))) ret = [] for u in dU: ret.append(gradient[U.index(u)]) diff --git a/tests/ad/ad.py b/tests/ad/ad.py index 2a4ac892..fbe60820 100755 --- a/tests/ad/ad.py +++ b/tests/ad/ad.py @@ -82,6 +82,9 @@ def real(x): # randomize values rng.cnormal([vv.value for vv in args]) + # string representation + assert type(str(c)) == str + # get gradient for real and imaginary part for ig, part in [(1.0, lambda x: x.real), (1.0j, lambda x: x.imag)]: v0 = c(initial_gradient=ig)