Skip to content

Commit

Permalink
tune
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Jun 6, 2024
1 parent d9984ba commit 758392a
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 15 deletions.
3 changes: 0 additions & 3 deletions lib/gpt/core/local_stencil/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,3 @@ def __del__(self):

def data_access_hints(self, *hints):
pass

def memory_access_pattern(self, fast_osites):
self.fast_osites = fast_osites
3 changes: 0 additions & 3 deletions lib/gpt/core/local_stencil/matrix_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,3 @@ def __del__(self):

def data_access_hints(self, *hints):
pass

def memory_access_pattern(self, fast_osites):
self.fast_osites = fast_osites
15 changes: 7 additions & 8 deletions lib/gpt/core/local_stencil/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,21 @@ def __init__(self, lat, points, code, segments, local=1):
self.obj = cgpt.stencil_tensor_create(
lat.v_obj[0], lat.grid.obj, points, self.code, self.segments, local
)
self.osites_per_cache_block = lat.grid.gsites

# auto tuner
gsites = int(lat.grid.gsites)
tag = f"local_tensor({lat.otype.__name__}, {lat.grid.describe()}, {hash_code(code)}, {len(segments)}, {local})"
super().__init__(tag, [2, 4, 8, 16, 32, 64, 128, 256], 4)
super().__init__(tag, [
(opi, opi * opcb) for opi in [2, 4, 8, 16, 32, 64, 128, 256] for opcb in [256, 1024, 8192, gsites]
], (4, gsites))

@auto_tuned_method
def __call__(self, opi, *fields):
cgpt.stencil_tensor_execute(self.obj, list(fields), opi, self.osites_per_cache_block)
def __call__(self, performance_args, *fields):
opi, opcb = performance_args
cgpt.stencil_tensor_execute(self.obj, list(fields), opi, opcb)

def __del__(self):
cgpt.stencil_tensor_delete(self.obj)

def data_access_hints(self, *hints):
pass

def memory_access_pattern(self, osites_per_instruction, osites_per_cache_block):
self.osites_per_instruction = osites_per_instruction
self.osites_per_cache_block = osites_per_cache_block
18 changes: 17 additions & 1 deletion lib/gpt/qcd/gauge/smear/local_stout.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def gradient(self, U, dU):
for b in range(ng):
dJdX[b] = g(-dJdX[b])

t("Invert M_ab")
t("invert M_ab")
inv_M_ab = g.matrix.inv(M_ab)

t("N M^-1")
Expand Down Expand Up @@ -425,7 +425,9 @@ def gradient(self, U, dU):
PlaqR = g((-rho) * csf(U[nu], nu, csf(U[mu], mu, csb(U[nu], nu, csb(U_mu_masked, mu)))))

dJdXe_nMpInv_y = dJdXe_nMpInv
t("compute_adj_ab")
compute_adj_ab(PlaqL, PlaqR, Nxy, generators, cache_ab)
t("non-local")
Fdet1_nu = g(g.transpose(Nxy) * dJdXe_nMpInv_y)

PlaqR = g((-1.0) * PlaqR)
Expand All @@ -439,7 +441,9 @@ def gradient(self, U, dU):
PlaqL = csb(U_mu_masked, mu)

dJdXe_nMpInv_y = g.cshift(dJdXe_nMpInv, mu, -1)
t("compute_adj_ab")
compute_adj_ab(PlaqL, PlaqR, Nxy, generators, cache_ab)
t("non-local")
Fdet1_nu += g.transpose(Nxy) * dJdXe_nMpInv_y

MpInvJx_nu = g.cshift(MpInvJx, mu, -1)
Expand All @@ -453,7 +457,9 @@ def gradient(self, U, dU):
PlaqR = csf(U[nu], nu)

dJdXe_nMpInv_y = g.cshift(dJdXe_nMpInv, nu, 1)
t("compute_adj_ab")
compute_adj_ab(PlaqL, PlaqR, Nxy, generators, cache_ab)
t("non-local")
Fdet1_nu += g.transpose(Nxy) * dJdXe_nMpInv_y

MpInvJx_nu = g.cshift(MpInvJx, nu, 1)
Expand All @@ -469,7 +475,9 @@ def gradient(self, U, dU):
dJdXe_nMpInv_y = g.cshift(dJdXe_nMpInv, mu, -1)
dJdXe_nMpInv_y = g.cshift(dJdXe_nMpInv_y, nu, 1)

t("compute_adj_ab")
compute_adj_ab(PlaqL, PlaqR, Nxy, generators, cache_ab)
t("non-local")
Fdet1_nu += g.transpose(Nxy) * dJdXe_nMpInv_y

MpInvJx_nu = g.cshift(MpInvJx, mu, -1)
Expand All @@ -480,15 +488,19 @@ def gradient(self, U, dU):
Fdet2_nu += FdetV

# force contributions to fundamental representation
t("adj_to_fund")
adjoint_to_fundamental(Fdet1[nu], Fdet1_nu, generators)
adjoint_to_fundamental(Fdet2[nu], Fdet2_nu, generators)
t("non-local")

# mu cw
PlaqL = g((-rho) * csf(U[mu], mu, csb(U[nu], nu, csb(U_mu_masked, mu))))
PlaqR = csb(U[nu], nu)

dJdXe_nMpInv_y = g.cshift(dJdXe_nMpInv, nu, -1)
t("compute_adj_ab")
compute_adj_ab(PlaqL, PlaqR, Nxy, generators, cache_ab)
t("non-local")
Fdet1_mu += g.transpose(Nxy) * dJdXe_nMpInv_y

MpInvJx_nu = g.cshift(MpInvJx, nu, -1)
Expand All @@ -503,7 +515,9 @@ def gradient(self, U, dU):

dJdXe_nMpInv_y = g.cshift(dJdXe_nMpInv, nu, 1)

t("compute_adj_ab")
compute_adj_ab(PlaqL, PlaqR, Nxy, generators, cache_ab)
t("non-local")
Fdet1_mu += g.transpose(Nxy) * dJdXe_nMpInv_y

MpInvJx_nu = g.cshift(MpInvJx, nu, 1)
Expand All @@ -516,8 +530,10 @@ def gradient(self, U, dU):
t("aggregate")
Fdet1_mu += g.transpose(NxxAd) * dJdXe_nMpInv

t("adj_to_fund")
adjoint_to_fundamental(Fdet1[mu], Fdet1_mu, generators)
adjoint_to_fundamental(Fdet2[mu], Fdet2_mu, generators)
t("aggregate")

force = [g((0.5 * 1j) * (x + y)) for x, y in zip(Fdet1, Fdet2)]

Expand Down

0 comments on commit 758392a

Please sign in to comment.