Skip to content

Commit

Permalink
optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Oct 26, 2023
1 parent 6800f15 commit 199b9c5
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 9 deletions.
7 changes: 6 additions & 1 deletion benchmarks/matrix_multiply.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@
three = g.lattice(grid, tp)
rng.cnormal([one, two])

# Rank inner product
# matrix multiply
nbytes = 3.0 * one.global_bytes() * N
n = (one.otype.nfloats // 2)**0.5
flops_per_matrix_multiply = n * n * (n * 6 + (n - 1) * 2)
flops = flops_per_matrix_multiply = grid.gsites * N * flops_per_matrix_multiply

# Time
dt = 0.0
Expand All @@ -44,10 +47,12 @@

# Report
GBPerSec = nbytes / dt / 1e9
GFLPerSec = flops / dt / 1e9
g.message(
f"""{N} matrix_multiply
Object type : {tp.__name__}
Time to complete : {dt:.2g} s
GFlops/s : {GFLPerSec:.2f}
Effective memory bandwidth : {GBPerSec:.2f} GB/s
"""
)
10 changes: 4 additions & 6 deletions benchmarks/stencil_tensor.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
#!/usr/bin/env python3
import gpt as g

#grid = g.grid([64,64,64,64], g.double)
grid = g.grid([32,32,32,32], g.double)
#grid = g.grid([32,16,16,16], g.double)
#grid = g.grid([16,16,16,32], g.double)
#grid = g.grid([2*4,4*3,3*4,3*3*4], g.double)
grid = g.grid(g.default.get_ivec("--grid", [16, 16, 16, 32], 4), g.double)

m1 = g.mcolor(grid)
m2 = g.mcolor(grid)
m3 = g.mcolor(grid)
Expand Down Expand Up @@ -69,6 +66,7 @@
(0,dst,ti.mov if l == 0 else ti.inc,1.0,[(2,0,3*i + l),(-1,0,3*l + j)])
)
segments = [(3, 9), (3, 9)]
#segments = [(27*2, 1)]
else:
for i in range(3):
for j in range(3):
Expand Down Expand Up @@ -153,7 +151,7 @@
g.message(g.norm2(R - R2) / g.norm2(R))
#
# D[i2[0], i1[0]] += sign1 * sign2 * Q1[i1[1], i2[1]] * g.transpose(Q2[i1[2], i2[2]])
for osites_per_instruction in [4,16,32,64,256]:
for osites_per_instruction in [1,2,4,16,32,64,256]:
for osites_per_cache_block in [ grid.gsites]:
ein.memory_access_pattern(osites_per_instruction, osites_per_cache_block)

Expand Down
2 changes: 1 addition & 1 deletion lib/gpt/core/local_stencil/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, lat, points, code, code_parallel_block_size=None, local=1):
self.obj = cgpt.stencil_matrix_create(
lat.v_obj[0], lat.grid.obj, points, self.code, code_parallel_block_size, local
)
self.fast_osites = 1
self.fast_osites = 0

def __call__(self, *fields):
cgpt.stencil_matrix_execute(self.obj, list(fields), self.fast_osites)
Expand Down
2 changes: 1 addition & 1 deletion lib/gpt/core/local_stencil/matrix_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(
code_parallel_block_size,
local,
)
self.fast_osites = 1
self.fast_osites = 0

def __call__(self, matrix_fields, vector_fields):
cgpt.stencil_matrix_vector_execute(self.obj, matrix_fields, vector_fields, self.fast_osites)
Expand Down

0 comments on commit 199b9c5

Please sign in to comment.