diff --git a/lib/gpt/ad/forward/foundation.py b/lib/gpt/ad/forward/foundation.py index b9e8902a..168b34a9 100644 --- a/lib/gpt/ad/forward/foundation.py +++ b/lib/gpt/ad/forward/foundation.py @@ -46,3 +46,12 @@ def adj(sx): def sum(sx): return sx.distribute1(lambda a: g.sum(a)) + + +def identity(sx): + idsx = g.identity(sx[1]) + return g.ad.forward.series({g.ad.forward.infinitesimal({}): idsx}, sx.landau_O) + + +def infinitesimal_to_cartesian(src, dsrc): + return dsrc[1].otype.infinitesimal_to_cartesian(src, dsrc) diff --git a/lib/gpt/ad/forward/series.py b/lib/gpt/ad/forward/series.py index d6f2e24f..626628a6 100644 --- a/lib/gpt/ad/forward/series.py +++ b/lib/gpt/ad/forward/series.py @@ -25,9 +25,10 @@ def promote(other, landau_O): if isinstance(other, infinitesimal): other = series({other: 1}, landau_O) - elif g.util.is_num(other): - other = series({infinitesimal({}): other}, landau_O) - return other + elif isinstance(other, series): + return other + + return series({infinitesimal({}): other}, landau_O) class series(base): @@ -104,6 +105,7 @@ def function(self, functional): return res def __iadd__(self, other): + other = promote(other, self.landau_O) res = self + other self.landau_O = res.landau_O self.terms = res.terms @@ -150,16 +152,24 @@ def __sub__(self, other): if not landau_O.accept(t2): continue if t2 not in terms: - terms[t2] = other.terms[t2] + terms[t2] = -other.terms[t2] else: terms[t2] = g(terms[t2] - other.terms[t2]) return series(terms, landau_O) + def __rsub__(self, other): + other = promote(other, self.landau_O) + return other - self + + def __neg__(self): + return (-1.0) * self + def __truediv__(self, other): return (1.0 / other) * self def __radd__(self, other): - return self.__add__(other, self) + other = promote(other, self.landau_O) + return other + self def __getitem__(self, tag): if tag == 1: @@ -170,3 +180,8 @@ def __setitem__(self, tag, value): if tag == 1: tag = infinitesimal({}) self.terms[tag] = value + + def get_grid(self): + return self.terms[infinitesimal({})].grid + + grid = property(get_grid) diff --git a/lib/gpt/ad/reverse/foundation.py b/lib/gpt/ad/reverse/foundation.py index a87c1509..bba3534e 100644 --- a/lib/gpt/ad/reverse/foundation.py +++ b/lib/gpt/ad/reverse/foundation.py @@ -64,7 +64,31 @@ def _backward(z): return g.ad.reverse.node_base(_forward, _backward, (x,)) - + +def trace(x, t): + def _forward(): + return g.trace(x.value, t) + + # not allowed to capture z, otherwise have reference loop! + def _backward(z): + if x.with_gradient: + accumulate_gradient(x, g.identity(x.value) * z.gradient) + + return g.ad.reverse.node_base(_forward, _backward, (x,)) + + +def sum(x): + def _forward(): + return g.sum(x.value) + + # not allowed to capture z, otherwise have reference loop! + def _backward(z): + if x.with_gradient: + accumulate_gradient(x, g.identity(x.value) * z.gradient) + + return g.ad.reverse.node_base(_forward, _backward, (x,)) + + def component_simple_map(operator, numpy_operator, extra_params, first, second): if operator == "relu": assert second is None diff --git a/lib/gpt/ad/reverse/node.py b/lib/gpt/ad/reverse/node.py index 5ffb990d..abffea30 100644 --- a/lib/gpt/ad/reverse/node.py +++ b/lib/gpt/ad/reverse/node.py @@ -17,7 +17,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # import gpt as g -from gpt.ad.reverse.util import accumulate_gradient +from gpt.ad.reverse.util import accumulate_gradient, is_field from gpt.ad.reverse import foundation from gpt.core.foundation import base @@ -99,7 +99,9 @@ def __init__(self, _forward, _backward=lambda z: None, _children=(), with_gradie # print(gctr) def zero_gradient(self): - self.gradient = g(0.0 * self.value) + self.gradient = 0.0 * self.value + if isinstance(self.gradient, g.expr): + self.gradient = g(self.gradient) def __mul__(x, y): if not isinstance(x, node_base): @@ -123,6 +125,9 @@ def _backward(z): def __rmul__(x, y): return node_base.__mul__(y, x) + def __truediv__(self, other): + return (1.0 / other) * self + def __add__(x, y): if not isinstance(x, node_base): x = node_base(x, with_gradient=False) @@ -161,6 +166,12 @@ def _backward(z): return node_base(_forward, _backward, (x, y)) + def __rsub__(x, y): + return node_base.__sub__(y, x) + + def __radd__(x, y): + return node_base.__add__(y, x) + def forward(self, nodes, eager=True, free=None): max_fields_allocated = 0 fields_allocated = 0 @@ -175,7 +186,7 @@ def forward(self, nodes, eager=True, free=None): if m._forward is not None: m.value = None fields_allocated -= 1 - if eager: + if eager and isinstance(n.value, g.expr): n.value = g(n.value) if verbose_memory: @@ -186,7 +197,10 @@ def forward(self, nodes, eager=True, free=None): def backward(self, nodes, first_gradient): fields_allocated = len(nodes) # .values max_fields_allocated = fields_allocated - self.gradient = 1.0 + if is_field(self.value): + raise Exception("Expression evaluates to a field. Gradient calculation is not unique.") + self.zero_gradient() + self.gradient += 1.0 for n in reversed(nodes): first_gradient_n = first_gradient[n] for m in first_gradient_n: @@ -198,10 +212,11 @@ def backward(self, nodes, first_gradient): if n._forward is not None: n.gradient = None fields_allocated -= 1 - if n._forward is not None: - if n is not self: - n.value = None - fields_allocated -= 1 + if n is not self: + n.value = None + fields_allocated -= 1 + else: + n.gradient = g.infinitesimal_to_cartesian(n.value, n.gradient) if verbose_memory: g.message( @@ -220,6 +235,11 @@ def __call__(self, with_gradients=True): def functional(self, *arguments): return node_differentiable_functional(self, arguments) + def get_grid(self): + return self.value.grid + + grid = property(get_grid) + def node(x, with_gradient=True): return node_base(x, with_gradient=with_gradient) diff --git a/lib/gpt/ad/reverse/util.py b/lib/gpt/ad/reverse/util.py index a5fe6c21..71a091ea 100644 --- a/lib/gpt/ad/reverse/util.py +++ b/lib/gpt/ad/reverse/util.py @@ -28,6 +28,8 @@ def is_field(x): return x.lattice() is not None elif g.util.is_num(x): return False + elif isinstance(x, g.ad.forward.series): + return is_field(x[1]) else: raise Exception(f"Unknown object type {type(x)}") @@ -39,4 +41,18 @@ def accumulate_gradient(lhs, rhs_gradient): rhs_gradient = g.sum(rhs_gradient) if g.util.is_num(lhs.gradient) and isinstance(rhs_gradient, g.expr): rhs_gradient = g(rhs_gradient) + + if isinstance(lhs.gradient, g.lattice) and isinstance(rhs_gradient, g.expr): + rhs_otype = rhs_gradient.lattice().otype + lhs_otype = lhs.gradient.otype + if lhs_otype.__name__ != rhs_otype.__name__: + if rhs_otype.spintrace[2] is not None: + if lhs_otype.__name__ == rhs_otype.spintrace[2]().__name__: + rhs_gradient = g(g.spin_trace(rhs_gradient)) + rhs_otype = rhs_gradient.otype + if rhs_otype.colortrace[2] is not None: + if lhs_otype.__name__ == rhs_otype.colortrace[2]().__name__: + rhs_gradient = g(g.color_trace(rhs_gradient)) + rhs_otype = rhs_gradient.otype + lhs.gradient += rhs_gradient diff --git a/lib/gpt/core/__init__.py b/lib/gpt/core/__init__.py index 6fb436de..612aeae4 100644 --- a/lib/gpt/core/__init__.py +++ b/lib/gpt/core/__init__.py @@ -46,6 +46,7 @@ slice, indexed_sum, identity, + infinitesimal_to_cartesian, project, where, scale_per_coordinate, diff --git a/lib/gpt/core/expr.py b/lib/gpt/core/expr.py index fb9c2b1e..267140bd 100644 --- a/lib/gpt/core/expr.py +++ b/lib/gpt/core/expr.py @@ -293,7 +293,7 @@ def expr_eval(first, second=None, ac=False): return_list = False else: assert ac is False - if gpt.util.is_list_instance(first, (gpt.lattice, gpt.tensor)): + if not gpt.util.is_list_instance(first, gpt.expr): return first e = expr(first) diff --git a/lib/gpt/core/foundation/lattice.py b/lib/gpt/core/foundation/lattice.py index 0528699e..e88897f5 100644 --- a/lib/gpt/core/foundation/lattice.py +++ b/lib/gpt/core/foundation/lattice.py @@ -103,3 +103,22 @@ def rank_sum(l): def sum(l): return l.grid.globalsum(rank_sum(l)) + + +def identity(src): + eye = gpt.lattice(src) + # identity only works for matrix types + n2 = len(eye.v_obj) + n = int(n2**0.5) + assert n * n == n2 + for i in range(n): + for j in range(n): + if i == j: + cgpt.lattice_set_to_identity(eye.v_obj[i * n + j]) + else: + cgpt.lattice_set_to_number(eye.v_obj[i * n + j], 0.0) + return eye + + +def infinitesimal_to_cartesian(src, dsrc): + return dsrc.otype.infinitesimal_to_cartesian(src, dsrc) diff --git a/lib/gpt/core/foundation/tensor.py b/lib/gpt/core/foundation/tensor.py index 6571d29b..9a9893de 100644 --- a/lib/gpt/core/foundation/tensor.py +++ b/lib/gpt/core/foundation/tensor.py @@ -52,3 +52,7 @@ def adj(l): if l.transposable(): return l.adj() return gpt.adj(gpt.expr(l)) + + +def infinitesimal_to_cartesian(src, dsrc): + return dsrc.otype.infinitesimal_to_cartesian(src, dsrc) diff --git a/lib/gpt/core/object_type/container.py b/lib/gpt/core/object_type/container.py index d3175e49..6dfca493 100644 --- a/lib/gpt/core/object_type/container.py +++ b/lib/gpt/core/object_type/container.py @@ -121,6 +121,9 @@ def __init__(self, ndim): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, a, da): + return da + ### # Matrices and vectors of spin @@ -148,6 +151,9 @@ def cartesian(self): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, a, da): + return da + def defect(self, field): return 0.0 @@ -196,6 +202,9 @@ def __init__(self, ndim): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, a, da): + return da + ### # Matrices and vectors of both spin and color @@ -231,6 +240,9 @@ def cartesian(self): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, a, da): + return da + def identity(self): return gpt.matrix_spin_color( numpy.multiply.outer(numpy.identity(self.shape[0]), numpy.identity(self.shape[2])), @@ -271,6 +283,9 @@ def __init__(self, spin_ndim, color_ndim): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, a, da): + return da + def distribute(self, mat, dst, src, zero_lhs): src, dst = gpt.util.to_list(src), gpt.util.to_list(dst) if src[0].otype.__name__ == self.ot_matrix: diff --git a/lib/gpt/core/object_type/su_n.py b/lib/gpt/core/object_type/su_n.py index 15d5a9f9..ea1273bf 100644 --- a/lib/gpt/core/object_type/su_n.py +++ b/lib/gpt/core/object_type/su_n.py @@ -96,6 +96,9 @@ def defect(self, A): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, A, dA): + return dA + def inner_product(self, left, right): if self.trace_norm is None: gen = self.generators(left.grid.precision.complex_dtype) @@ -128,6 +131,15 @@ def defect(self, U): err2 += gpt.norm2(gpt.matrix.det(U) - I_s) / gpt.norm2(I_s) return err2**0.5 + def infinitesimal_to_cartesian(self, U, dU): + src = gpt(dU * gpt.adj(U)) + N = self.shape[0] + ret = 0.5 * src - 0.5 * gpt.adj(src) + ret -= gpt.identity(src) * gpt.trace(ret) / N + ret = gpt(ret / 2j) + ret.otype = self.cartesian() + return ret + def project(self, U, method): if method == "defect_right" or method == "defect": # V = V0(1 + eps) with dag(eps) = eps , dag(V0) V0 = 1 diff --git a/lib/gpt/core/object_type/u_1.py b/lib/gpt/core/object_type/u_1.py index 440af006..56525c30 100644 --- a/lib/gpt/core/object_type/u_1.py +++ b/lib/gpt/core/object_type/u_1.py @@ -59,6 +59,9 @@ def cartesian(self): def compose(self, a, b): return a + b + def infinitesimal_to_cartesian(self, A, dA): + return dA + def generators(self, dt): return [complex(1.0, 0)] @@ -82,6 +85,11 @@ def __init__(self): def compose(self, a, b): return a * b + def infinitesimal_to_cartesian(self, U, dU): + ret = gpt(dU * gpt.adj(U) / 1j) + ret.otype = self.cartesian() + return ret + def defect(self, U): I = gpt.identity(U) err2 = gpt.norm2(U * gpt.adj(U) - I) / gpt.norm2(I) diff --git a/lib/gpt/core/operator/unary.py b/lib/gpt/core/operator/unary.py index 2af99fd6..393b067f 100644 --- a/lib/gpt/core/operator/unary.py +++ b/lib/gpt/core/operator/unary.py @@ -76,6 +76,8 @@ def adj(l): return l.adj() elif isinstance(l, gpt.core.foundation.base): return l.__class__.foundation.adj(l) + elif gpt.util.is_num(l): + return gpt.util.adj_num(l) else: return adj(gpt.expr(l)) @@ -117,6 +119,7 @@ def rank_sum(e): e = gpt.eval(e) return e.__class__.foundation.rank_sum(e) + def sum(e): if isinstance(e, gpt.expr): e = gpt.eval(e) diff --git a/lib/gpt/core/transform.py b/lib/gpt/core/transform.py index 931aff96..f666c7dc 100644 --- a/lib/gpt/core/transform.py +++ b/lib/gpt/core/transform.py @@ -159,18 +159,13 @@ def indexed_sum(fields, index, length): def identity(src): - eye = gpt.lattice(src) - # identity only works for matrix types - n2 = len(eye.v_obj) - n = int(n2**0.5) - assert n * n == n2 - for i in range(n): - for j in range(n): - if i == j: - cgpt.lattice_set_to_identity(eye.v_obj[i * n + j]) - else: - cgpt.lattice_set_to_number(eye.v_obj[i * n + j], 0.0) - return eye + return src.__class__.foundation.identity(src) + + +def infinitesimal_to_cartesian(src, dsrc): + if gpt.util.is_num(src): + return dsrc + return dsrc.__class__.foundation.infinitesimal_to_cartesian(src, dsrc) def project(src, method): diff --git a/lib/gpt/core/util.py b/lib/gpt/core/util.py index 26f443af..d4b03c41 100644 --- a/lib/gpt/core/util.py +++ b/lib/gpt/core/util.py @@ -29,6 +29,16 @@ def is_num(x): ) +# adj a number +def adj_num(x): + if isinstance(x, (int, float, gpt.qfloat)): + return x + elif isinstance(x, complex): + return x.conjugate() + else: + raise Exception(f"adj_num not yet implemented for type {type(x)}") + + # convert to number type def to_num(x): if isinstance(x, (np.complex64, np.complex128)): diff --git a/lib/gpt/qcd/gauge/project.py b/lib/gpt/qcd/gauge/project.py index 4424ceb6..ff3e2dcc 100644 --- a/lib/gpt/qcd/gauge/project.py +++ b/lib/gpt/qcd/gauge/project.py @@ -23,8 +23,8 @@ def traceless_anti_hermitian(src): if isinstance(src, list): return [traceless_anti_hermitian(x) for x in src] - - src = g.eval(src) + if isinstance(src, g.expr): + src = g.eval(src) N = src.otype.shape[0] ret = g(0.5 * src - 0.5 * g.adj(src)) ret -= g.identity(src) * g.trace(ret) / N diff --git a/tests/ad/ad.py b/tests/ad/ad.py index 556ba7cb..f3f4562e 100755 --- a/tests/ad/ad.py +++ b/tests/ad/ad.py @@ -35,7 +35,9 @@ ( g.norm2( 2.0 * a2 * t1 * a1 * relu(a1 * x + b1) - - 3.5 * a2 * b2 + - 3.5 * a2 * b2 * g.trace(a1) + - 1.5 * g.color_trace(a1) * a2 * b2 + + 2.5 * a2 * g.spin_trace(a1) * b2 + t1 * g.cshift(a1 * x, 1, -1) ), 1e-1, @@ -201,11 +203,88 @@ def scale(lam): test = g.norm2(g.cshift(g.cshift(lz, 0, 1), 0, -1) - lz) -g.message(test) - -# TODO: -# - fad.series, rad.node need to play nice with g.eval -# (inherit from g.evaluable) -# - fad.series, rad.node play nice with regular g.inner_product etc. -# for use in regular algorithms; inherit from lattice_like which -# should add maps to rad.inner_product, etc. +err = abs(test[1] + test[dm] + test[alpha] + test[alpha**2] + test[dm**2] + test[dm * alpha]) +assert err < 1e-6 + + +##################################### +# combined forward/reverse AD tests +##################################### +dbeta = g.ad.forward.infinitesimal("dbeta") +On = g.ad.forward.landau(dbeta**2) + +U = [] +for mu in range(4): + U_mu_0 = rng.element(g.mcolor(grid)) + U_mu = g.ad.forward.series(U_mu_0, On) + U_mu[dbeta] = g(1j * U_mu_0 * rng.element(g.lattice(grid, U_mu_0.otype.cartesian()))) + U.append(U_mu) + +Id = g.ad.forward.series(g.identity(U_mu_0), On) + +# unitarity +for mu in range(4): + eps2 = g.norm2(U[mu] * g.adj(U[mu]) - Id) + eps2 = eps2[1].real + eps2[dbeta].real + assert eps2 < 1e-25 + +# compare to wilson action +a = g.qcd.gauge.action.wilson(1) + + +def plaquette(U): + res = None + for mu in range(4): + for nu in range(mu): + nn = g( + g.trace( + g.adj(U[nu]) * U[mu] * g.cshift(U[nu], mu, 1) * g.cshift(g.adj(U[mu]), nu, 1) + ) + ) + if res is None: + res = nn + else: + res += nn + res = (res + g.adj(res)) / 12 / 3 + vol = U[0].grid.gsites + Nd = 4 + res = g.sum(res) / vol + return (Nd - 1) * Nd * vol / 2.0 * (1.0 - res) + + +# first test action +res = plaquette(U) +U1 = [u[1] for u in U] +err = abs(res[1] - a(U1)) +g.message(f"Action test: {err}") +assert err < 1e-6 + +U_2 = [g.ad.reverse.node(u) for u in U] +res = plaquette(U_2)() +err = abs(res[1] - a(U1)) +g.message(f"Action test (FW/REV): {err}") +assert err < 1e-6 + +# gradient test +gradients = a.gradient(U1, U1) +gradients2 = [u.gradient[1] for u in U_2] +for mu in range(4): + err = (g.norm2(gradients[mu] - gradients2[mu]) / g.norm2(gradients[mu])) ** 0.5 + g.message(f"Gradient test [{mu}]: {err}") + assert err < 1e-10 + +# numerical derivative of action value test +eps = 1e-4 +U_plus = [g(u[1] + eps * u[dbeta]) for u in U] +U_minus = [g(u[1] - eps * u[dbeta]) for u in U] +da_dbeta = (plaquette(U_plus) - plaquette(U_minus)) / 2 / eps +err = abs(da_dbeta - res[dbeta]) / abs(da_dbeta) +g.message(f"Numerical action derivative test: {err}") +assert err < 1e-5 + +# numerical derivative of gradient test +for mu in range(4): + dg_dbeta = g((a.gradient(U_plus, U_plus)[mu] - a.gradient(U_minus, U_minus)[mu]) / 2 / eps) + err = (g.norm2(dg_dbeta - U_2[mu].gradient[dbeta]) / g.norm2(dg_dbeta)) ** 0.5 + g.message(f"Numerical action gradient [{mu}] derivative test: {err}") + assert err < 1e-5