Skip to content
This repository has been archived by the owner on Nov 27, 2024. It is now read-only.

Commit

Permalink
add openmp flag and by pass workaround flag
Browse files Browse the repository at this point in the history
  • Loading branch information
tj-sun committed Aug 4, 2019
1 parent 4e27c2b commit f9e3066
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 5 deletions.
9 changes: 9 additions & 0 deletions pyop2/codegen/rep2loopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,15 @@ def generate(builder, wrapper_name=None):
name=wrapper_name,
# TODO, should these really be silenced?
silenced_warnings=["write_race*"])
from pyop2.configuration import configuration
if configuration["time"]:
batch_size = configuration["simd_width"]
if builder.extruded:
start, end = parameters.layer_start, parameters.layer_end
else:
start, end = "start", "end"
wrapper = loopy.assume(wrapper, "{0} mod {1} = 0".format(end, batch_size))
wrapper = loopy.assume(wrapper, "exists zz: zz > 0 and {0} = {1}*zz + {2}".format(end, configuration["simd_width"], start))

# prioritize loops
for indices in context.index_ordering:
Expand Down
5 changes: 3 additions & 2 deletions pyop2/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ def workaround_cflags(self):
if version.StrictVersion("7.3") <= ver < version.StrictVersion("7.5"):
# GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90055
# See also https://github.com/firedrakeproject/firedrake/issues/1442
return ["-fno-tree-loop-vectorize"]
pass # enable vectorization for paper
# return ["-fno-tree-loop-vectorize"]
return []

@collective
Expand Down Expand Up @@ -385,7 +386,7 @@ class LinuxCompiler(Compiler):
:kwarg comm: Optional communicator to compile the code on (only
rank 0 compiles code) (defaults to COMM_WORLD)."""
def __init__(self, cppargs=[], ldargs=[], cpp=False, comm=None):
opt_flags = ['-march=native', '-O3', '-ffast-math']
opt_flags = ['-O3', '-ffast-math', '-fopenmp']
if configuration['debug']:
opt_flags = ['-O0', '-g']
cc = "mpicc"
Expand Down
9 changes: 6 additions & 3 deletions pyop2/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,18 @@ def vectorise(wrapper, iname, batch_size):
kernel = kernel.copy(temporary_variables=tmps)

# split iname and vectorize the inner loop
slabs = (1, 1)
if configuration["time"]:
slabs = (0, 0)
inner_iname = iname + "_batch"

if configuration["vectorization_strategy"] == "ve":
# vectorize using vector extenstions
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=(0, 1), inner_tag="c_vec", inner_iname=inner_iname)
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=slabs, inner_tag="c_vec", inner_iname=inner_iname)
else:
# vectoriza using omp pragma simd
assert configuration["vectorization_strategy"] == "omp"
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=(0, 1), inner_tag="omp_simd", inner_iname=inner_iname)
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=slabs, inner_tag="omp_simd", inner_iname=inner_iname)

alignment = configuration["alignment"]
tmps = dict((name, tv.copy(alignment=alignment)) for name, tv in kernel.temporary_variables.items())
Expand Down Expand Up @@ -242,7 +245,7 @@ def set_nbytes(self, args):
seen = set()
for arg in args:
if arg.access is INC:
nbytes += arg.data.nbytes
nbytes += arg.data.nbytes * 2
else:
nbytes += arg.data.nbytes
for map_ in arg.map_tuple:
Expand Down

0 comments on commit f9e3066

Please sign in to comment.