-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
scf: squashmerge scf/symmetry-halide into master
- Loading branch information
Showing
20 changed files
with
2,306 additions
and
212 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/twoel_g.* | ||
/twoel.* | ||
faketwoel.trace | ||
/fake | ||
|
||
perf.data | ||
/__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
## Makefile for SCF-C | ||
|
||
CXX=clang++-10 | ||
CXXFLAGS=-O2 -g -Wall -fdiagnostics-color=always | ||
CXXLINK=-ldl -lpthread | ||
|
||
THREADS=1 | ||
TILE_SIZE=15 | ||
VECTOR_SIZE=4 | ||
DATA_SIZE=120 | ||
PERF_FUNCS=4D_ij_low_kl_low_pairs_low | ||
PERF_FUNCS=all | ||
|
||
TRACE_DATA_SIZE=15 | ||
TRACE_VIDEO_WIDTH=1920 | ||
TRACE_VIDEO_HEIGHT=1080 | ||
TRACE_FUNCS=all | ||
|
||
run: fake | ||
HL_NUM_THREADS=$(THREADS) ./fake $(DATA_SIZE) | ||
|
||
perf: fake | ||
HL_NUM_THREADS=$(THREADS) perf record ./fake $(DATA_SIZE) | ||
perf report | ||
|
||
twoel.a twoel.h: fakegen.py ../tools/twoel_gen.py ../tools/decompose.py Makefile | ||
python3 fakegen.py $(PERF_FUNCS) tilesize=$(TILE_SIZE) vectorsize=$(VECTOR_SIZE) | ||
|
||
fake: fake.cpp twoel.a twoel.h | ||
$(CXX) $(CXXFLAGS) fake.cpp twoel.a -o $@ $(CXXLINK) | ||
|
||
twoel.trace: faketwoel.py ../tools/twoel_gen.py ../tools/decompose.py Makefile | ||
rm -f $@ | ||
HL_NUM_THREADS=$(THREADS) HL_TRACE_FILE=$@ python3 faketwoel.py all datasize=$(TRACE_DATA_SIZE) tilesize=$(TILE_SIZE) vectorsize=$(VECTOR_SIZE) itercount=0 tracing=True | ||
|
||
video: twoel.trace | ||
rm -f twoel.mp4 | ||
time cat twoel.trace | HalideTraceViz \ | ||
--size $(TRACE_VIDEO_WIDTH) $(TRACE_VIDEO_HEIGHT) --zoom 4 --timestep 15 --hold 100 --decay 5 5 --gray --auto_layout \ | ||
| ffmpeg -loglevel warning -f rawvideo -pix_fmt bgr32 -s $(TRACE_VIDEO_WIDTH)x$(TRACE_VIDEO_HEIGHT) -i /dev/stdin -c:v h264 twoel.mp4 | ||
|
||
clean: | ||
rm -rf *.trace twoel.* twoel_g.* fake perf.* __pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/* Standalone program that calls twoel() the same way SCF does, and measures how long it takes. */ | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <time.h> | ||
#include <sys/time.h> | ||
#include <sys/times.h> | ||
#include <unistd.h> | ||
|
||
#include <algorithm> | ||
#include <vector> | ||
#include <sstream> | ||
|
||
#include <HalideBuffer.h> | ||
|
||
#include "twoel.h" | ||
|
||
using namespace Halide::Runtime; | ||
|
||
int N; | ||
|
||
double gen0d() { | ||
return drand48(); | ||
} | ||
|
||
Buffer<double> gen1d(int I=0) { | ||
if(I == 0) | ||
I = N; | ||
Buffer<double> rv(I); | ||
for(int i = 0; i < I; i++) | ||
rv(i) = drand48(); | ||
return rv; | ||
} | ||
|
||
Buffer<double> gen2d(int I=0, int J=0) { | ||
if(I == 0) | ||
I = N; | ||
if(J == 0) | ||
J = N; | ||
Buffer<double> rv(I, J); | ||
for(int i = 0; i < I; i++) | ||
for(int j = 0; j < J; j++) | ||
rv(i, j) = drand48(); | ||
return rv; | ||
} | ||
|
||
double timestamp() { | ||
double rv; | ||
struct timeval tv; | ||
gettimeofday(&tv, NULL); | ||
rv = tv.tv_usec; | ||
rv /= 1000000; | ||
rv += tv.tv_sec; | ||
return rv; | ||
} | ||
|
||
long cputickstamp() { | ||
struct tms tms; | ||
times(&tms); | ||
return tms.tms_utime; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
if(argc < 2) { | ||
fprintf(stderr, "Usage: %s <N>\n", argv[0]); | ||
return 1; | ||
} | ||
N = strtol(argv[1], NULL, 0); | ||
srand(2); | ||
srand48(rand()); | ||
|
||
double delo2 = gen0d(); | ||
double delta = gen0d(); | ||
double rdelta = gen0d(); | ||
Buffer<double> expnt = gen1d(); | ||
Buffer<double> rnorm = gen1d(); | ||
Buffer<double> x = gen1d(); | ||
Buffer<double> y = gen1d(); | ||
Buffer<double> z = gen1d(); | ||
Buffer<double> fm = gen2d(1002, 5); | ||
Buffer<double> g_fock_in = gen2d(); | ||
Buffer<double> g_dens = gen2d(); | ||
Buffer<double> g_fock_out = gen2d(); | ||
Buffer<double> rv = gen1d(); | ||
|
||
// dry run | ||
int error = twoel(delo2, delta, rdelta, expnt, rnorm, x, y, z, fm, g_fock_in, g_dens, rv, g_fock_out); | ||
if(error) { | ||
fprintf(stderr, "twoel failed with code %d\n", error); | ||
return 1; | ||
} | ||
|
||
// benchmark it | ||
std::vector<double> throughputs = {}; | ||
for(int trial = 0; trial < 4; trial++) { | ||
double start_walltime = timestamp(); | ||
clock_t start_cputicks = cputickstamp(); | ||
int itercount; | ||
for(itercount = 0; timestamp() - start_walltime < 5.0; itercount++) { | ||
twoel(delo2, delta, rdelta, expnt, rnorm, x, y, z, fm, g_fock_in, g_dens, rv, g_fock_out); | ||
} | ||
clock_t cputicks = cputickstamp() - start_cputicks; | ||
double walltime = timestamp() - start_walltime; | ||
double cputime = (double)cputicks / sysconf(_SC_CLK_TCK); | ||
double per_walltime = walltime / itercount; | ||
double per_cputime = cputime / itercount; | ||
double throughput = (double)1.0 * N * N * N * N / per_walltime; | ||
printf("%d iterations in %.3f seconds, %.3f seconds of cpu time, %.3e seconds per iter, %.3e cpu seconds per iter, %.3e effective iters per second\n", itercount, walltime, cputime, per_walltime, per_cputime, throughput); | ||
throughputs.push_back(throughput); | ||
} | ||
// sort and stringify the throughput values | ||
std::sort(throughputs.begin(), throughputs.end()); | ||
std::ostringstream stringify; | ||
for(int i = 0; i < throughputs.size(); i++) { | ||
if(i) | ||
stringify << ", "; | ||
stringify << throughputs[i]; | ||
} | ||
printf("throughputs: {%s}\n", stringify.str().c_str()); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/usr/bin/env python3 | ||
|
||
'''generate a twoel.a and twoel.h with only the specified zones, and with any scheduling params (thread count, vector/block sizes) passed through''' | ||
|
||
import sys | ||
sys.path.append('../tools') | ||
import halide as hl | ||
import twoel_gen | ||
|
||
def gen_twoel(zone_name, **kwargs): | ||
|
||
# get JIT pipeline | ||
zone_names = zone_name.split(",") | ||
myzones = [] | ||
for zone in zones.loops: | ||
if zone_name == 'all' or zone.name in zone_names: | ||
myzones.append(zone) | ||
if len(myzones) == 0: | ||
if zone_name == 'list': | ||
print([z['name'] for z in zones.loops]) | ||
else: | ||
print("no zone %s found"%zone_name) | ||
exit(1) | ||
if "target_name" in kwargs: | ||
target_name = kwargs["target_name"] | ||
del kwargs["target_name"] | ||
else: | ||
target_name = "x86-64-linux-avx-avx2-f16c-fma-sse41-profile-disable_llvm_loop_opt" | ||
zones.loops = myzones | ||
gen = twoel_gen.Generate_twoel(loopnests=zones, **kwargs) | ||
gen.generate_twoel() | ||
p = gen.pipeline | ||
print("generating for target", target_name) | ||
target = hl.Target(target_name) | ||
p.compile_to( | ||
{ | ||
hl.Output.c_header: "twoel.h", | ||
hl.Output.c_source: "twoel.cpp", | ||
hl.Output.static_library: "twoel.a", | ||
hl.Output.stmt: "twoel.stmt", | ||
hl.Output.stmt_html: "twoel.html", | ||
# the following outputs are useful for running it from python | ||
#hl.Output.object: "twoel.o", | ||
#hl.Output.python_extension: "twoel.py.cpp", | ||
}, list(gen.inputs.values()), "twoel", target | ||
) | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) == 1: | ||
print("Usage: %s <zonename>"%sys.argv[0]) | ||
exit(1) | ||
|
||
zone_name = sys.argv[1] | ||
|
||
kwargs = {} | ||
for param in sys.argv[2:]: | ||
k, v = param.split("=") | ||
try: | ||
v = int(v) | ||
except: | ||
try: | ||
v = bool(v) | ||
except: | ||
pass | ||
kwargs[k] = v | ||
|
||
zones = twoel_gen.define_original_twoel_zone().split_recursive() | ||
|
||
gen_twoel(zone_name, **kwargs) |
Oops, something went wrong.