Skip to content

Commit

Permalink
verbosity and benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Oct 7, 2023
1 parent db14236 commit 174fd9f
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 9 deletions.
80 changes: 80 additions & 0 deletions benchmarks/stencil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
#
# Authors: Christoph Lehner 2023
#
import gpt as g

g.default.set_verbose("random", False)
rng = g.random("benchmark", "vectorized_ranlux24_24_64")

for precision in [g.single, g.double]:
grid = g.grid(g.default.get_ivec("--grid", [16, 16, 16, 32], 4), precision)
N = g.default.get_int("--N", 1000)

g.message(
f"""
Local Stencil Benchmark with
fdimensions : {grid.fdimensions}
precision : {precision.__name__}
"""
)

U = g.qcd.gauge.random(grid, rng, scale=0.5)
_U = [1, 2, 3, 4]
_X = 0
_Xp = [1, 2, 3, 4]
V = g.mcolor(grid)
rng.element(V)
# U = g.qcd.gauge.transformed(U, V)
code = []
for mu in range(4):
for nu in range(0, mu):
code.append(
{
"target": 0,
"accumulate": -1 if (mu == 1 and nu == 0) else 0,
"weight": 1.0,
"factor": [
(_U[mu], _X, 0),
(_U[nu], _Xp[mu], 0),
(_U[mu], _Xp[nu], 1),
(_U[nu], _X, 1),
],
}
)
st = g.stencil.matrix(
U[0], [(0, 0, 0, 0), (1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)], [0], [1,2,3,4], code
)
# test plaquette
P = g.lattice(U[0])
st(P, *U)
pl = g.sum(g.trace(P)).real / P.grid.gsites / 3 / 2 / 3
assert abs(g.qcd.gauge.plaquette(U) - pl) < precision.eps * 100

# Flops
gauge_otype = U[0].otype
Nc = gauge_otype.shape[0]
flops_per_matrix_multiply = Nc**3 * 6 + (Nc - 1) * Nc**2 * 2
flops_per_site = 3 * flops_per_matrix_multiply * 4 * 3
flops = flops_per_site * P.grid.gsites * N
nbytes = (5 * Nc * Nc * 2) * precision.nbytes * P.grid.gsites * N

# Warmup
for n in range(5):
st(P, *U)

# Time
t0 = g.time()
for n in range(N):
st(P, *U)
t1 = g.time()

# Report
GFlopsPerSec = flops / (t1 - t0) / 1e9
GBPerSec = nbytes / (t1 - t0) / 1e9
g.message(
f"""{N} applications of plaquette stencil
Time to complete : {t1-t0:.2f} s
Total performance : {GFlopsPerSec:.2f} GFlops/s
Effective memory bandwidth : {GBPerSec:.2f} GB/s"""
)
2 changes: 1 addition & 1 deletion lib/gpt/core/copy_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def __call__(self, local_only=False, skip_optimize=False, use_communication_buff
gpt.message(t_cgpt)

gpt.message(
f"copy_plan: create: {t1-t0} s (local_only = {local_only}, skip_optimize = {skip_optimize}, use_communication_buffers = {use_communication_buffers})"
f"copy_plan: create: {t1-t0} s (local_only = {local_only}, skip_optimize = {skip_optimize}, use_communication_buffers = {use_communication_buffers}, communication_buffer_location = {self.communication_buffer_location.__name__})"
)

return copy_plan_executer(
Expand Down
11 changes: 11 additions & 0 deletions lib/gpt/core/stencil/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,12 @@ def __init__(self, lat, points, write_fields, read_fields, code, code_parallel_b
)
self.write_fields = write_fields
self.read_fields = read_fields
self.verbose_performance = g.default.is_verbose("stencil_performance")

def __call__(self, *fields):
if self.verbose_performance:
t = g.timer("stencil.matrix")
t("create fields")
padded_fields = []
padded_field = None
for i in range(len(fields)):
Expand All @@ -54,6 +58,13 @@ def __call__(self, *fields):
for i in range(len(fields)):
if padded_fields[i] is None:
padded_fields[i] = g.lattice(padded_field)
if self.verbose_performance:
t("local stencil")
self.local_stencil(*padded_fields)
if self.verbose_performance:
t("extract")
for i in self.write_fields:
self.padding.extract(fields[i], padded_fields[i])
if self.verbose_performance:
t()
g.message(t)
32 changes: 24 additions & 8 deletions lib/gpt/qcd/fermion/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,40 @@ def register(reg, op):
reg.Mdiag = lambda dst, src: op.apply_unary_operator(2009, dst, src)
reg.Dminus = lambda dst, src: op.apply_unary_operator(2010, dst, src)
reg.DminusDag = lambda dst, src: op.apply_unary_operator(2011, dst, src)
reg.ImportPhysicalFermionSource = lambda dst, src: op.apply_unary_operator(2012, dst, src)
reg.ImportUnphysicalFermion = lambda dst, src: op.apply_unary_operator(2013, dst, src)
reg.ExportPhysicalFermionSolution = lambda dst, src: op.apply_unary_operator(2014, dst, src)
reg.ExportPhysicalFermionSource = lambda dst, src: op.apply_unary_operator(2015, dst, src)
reg.ImportPhysicalFermionSource = lambda dst, src: op.apply_unary_operator(
2012, dst, src
)
reg.ImportUnphysicalFermion = lambda dst, src: op.apply_unary_operator(
2013, dst, src
)
reg.ExportPhysicalFermionSolution = lambda dst, src: op.apply_unary_operator(
2014, dst, src
)
reg.ExportPhysicalFermionSource = lambda dst, src: op.apply_unary_operator(
2015, dst, src
)
reg.Dhop = lambda dst, src: op.apply_unary_operator(3001, dst, src)
reg.DhopDag = lambda dst, src: op.apply_unary_operator(4001, dst, src)
reg.DhopEO = lambda dst, src: op.apply_unary_operator(3002, dst, src)
reg.DhopEODag = lambda dst, src: op.apply_unary_operator(4002, dst, src)
reg.Mdir = lambda dst, src, dir, disp: op.apply_dirdisp_operator(5001, dst, src, dir, disp)
reg.Mdir = lambda dst, src, dir, disp: op.apply_dirdisp_operator(
5001, dst, src, dir, disp
)
reg.MDeriv = lambda mat, dst, src: op.apply_deriv_operator(6001, mat, dst, src)
reg.MDerivDag = lambda mat, dst, src: op.apply_deriv_operator(7001, mat, dst, src)
reg.MoeDeriv = lambda mat, dst, src: op.apply_deriv_operator(6002, mat, dst, src)
reg.MoeDerivDag = lambda mat, dst, src: op.apply_deriv_operator(7002, mat, dst, src)
reg.MeoDeriv = lambda mat, dst, src: op.apply_deriv_operator(6003, mat, dst, src)
reg.MeoDerivDag = lambda mat, dst, src: op.apply_deriv_operator(7003, mat, dst, src)
reg.DhopDeriv = lambda mat, dst, src: op.apply_deriv_operator(6004, mat, dst, src)
reg.DhopDerivDag = lambda mat, dst, src: op.apply_deriv_operator(7004, mat, dst, src)
reg.DhopDerivDag = lambda mat, dst, src: op.apply_deriv_operator(
7004, mat, dst, src
)
reg.DhopDerivEO = lambda mat, dst, src: op.apply_deriv_operator(6005, mat, dst, src)
reg.DhopDerivEODag = lambda mat, dst, src: op.apply_deriv_operator(7005, mat, dst, src)
reg.DhopDerivEODag = lambda mat, dst, src: op.apply_deriv_operator(
7005, mat, dst, src
)
reg.DhopDerivOE = lambda mat, dst, src: op.apply_deriv_operator(6006, mat, dst, src)
reg.DhopDerivOEDag = lambda mat, dst, src: op.apply_deriv_operator(7006, mat, dst, src)
reg.DhopDerivOEDag = lambda mat, dst, src: op.apply_deriv_operator(
7006, mat, dst, src
)

0 comments on commit 174fd9f

Please sign in to comment.