Skip to content

Commit

Permalink
add fallback_scalar_nav_1024_uf1_falp_bench
Browse files Browse the repository at this point in the history
  • Loading branch information
azimafroozeh committed Nov 19, 2024
1 parent 5d48078 commit a3f93a2
Show file tree
Hide file tree
Showing 10 changed files with 408 additions and 288 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
benchmark_number,name,iterations,cycles_per_tuple
1,Air-Pressure_fused,3000000,3.21137
1,Air-Pressure,3000000,0.501445
2,Arade/4_fused,3000000,3.34845
2,Arade/4,3000000,0.559359
3,Basel-Temp_fused,3000000,3.44184
3,Basel-Temp,3000000,0.647628
4,Basel-Wind_fused,3000000,3.44574
4,Basel-Wind,3000000,0.625876
5,Bird-Mig_fused,3000000,3.37468
5,Bird-Mig,3000000,0.577556
6,Btc-Price_fused,3000000,3.42224
6,Btc-Price,3000000,0.607025
7,Blockchain_fused,3000000,3.45231
7,Blockchain,3000000,0.615256
8,City-Temp_fused,3000000,3.33958
8,City-Temp,3000000,0.562281
9,CMS/1_fused,3000000,3.50209
9,CMS/1,3000000,0.668767
10,CMS/9_fused,3000000,3.32484
10,CMS/9,3000000,0.556709
11,CMS/25_fused,3000000,3.50007
11,CMS/25,3000000,0.635558
12,Dew-Temp_fused,3000000,3.35789
12,Dew-Temp,3000000,0.569733
13,Bio-Temp_fused,3000000,3.32392
13,Bio-Temp,3000000,0.555212
14,Food-prices_fused,3000000,3.39494
14,Food-prices,3000000,0.617178
15,Gov/10_fused,3000000,3.48623
15,Gov/10,3000000,0.695229
16,Gov/26_fused,3000000,0.419549
16,Gov/26,3000000,0.464249
17,Gov/30_fused,3000000,0.422889
17,Gov/30,3000000,0.466942
18,Gov/31_fused,3000000,0.420349
18,Gov/31,3000000,0.46615
19,Gov/40_fused,3000000,0.421145
19,Gov/40,3000000,0.467013
20,Medicare/1_fused,3000000,3.51459
20,Medicare/1,3000000,0.665249
21,Medicare/9_fused,3000000,3.32574
21,Medicare/9,3000000,0.558239
22,PM10-dust_fused,3000000,3.25268
22,PM10-dust,3000000,0.520411
23,NYC/29_fused,3000000,3.49727
23,NYC/29,3000000,0.631309
24,POI-lat_fused,3000000,1.2926
24,POI-lat,3000000,1.71827
25,POI-lon_fused,3000000,4.40437
25,POI-lon,3000000,1.96223
26,SD-bench_fused,3000000,3.37129
26,SD-bench,3000000,0.576337
27,Stocks-DE_fused,3000000,3.32531
27,Stocks-DE,3000000,0.556981
28,Stocks-UK_fused,3000000,3.33648
28,Stocks-UK,3000000,0.558195
29,Stocks-USA_fused,3000000,3.31633
29,Stocks-USA,3000000,0.553238
30,Wind-dir_fused,3000000,3.20903
30,Wind-dir,3000000,0.499362
1 change: 1 addition & 0 deletions publication/script/master_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ else
"$CLONED_DIR/build/publication/source_code/generated/x86_64/avx512bw_intrinsic_uf1/x86_64_avx512bw_intrinsic_1024_uf1_falp_bench"
"$CLONED_DIR/build/publication/source_code/generated/x86_64/avx512bw_intrinsic_uf1/x86_64_avx2_intrinsic_1024_uf1_falp_bench"
"$CLONED_DIR/build/publication/source_code/generated/x86_64/avx512bw_intrinsic_uf1/fallback_scalar_aav_1024_uf1_falp_bench"
"$CLONED_DIR/build/publication/source_code/generated/x86_64/avx512bw_intrinsic_uf1/fallback_scalar_nav_1024_uf1_falp_bench"
"$CLONED_DIR/build/publication/source_code/bench_speed/publication_bench_alp_cutter_decode"
"$CLONED_DIR/build/publication/source_code/bench_speed/publication_bench_alp_cutter_encode"
"$CLONED_DIR/build/publication/source_code/bench_speed/publication_bench_alp_encode"
Expand Down
3 changes: 1 addition & 2 deletions publication/source_code/generated/fallback/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#add_subdirectory(scalar_av_uf1)
add_subdirectory(scalar_nav_uf1)
add_subdirectory(scalar_aav_uf1)
#add_subdirectory(scalar_nav_uf1)

add_library(generated_fallback
OBJECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "fallback_scalar_aav_1024_uf1_falp_bench.hpp"
#include "alp.hpp"
#include "data.hpp"
#include "fallback_scalar_aav_1024_uf1_falp_bench.hpp"

static __attribute__((noinline)) benchmark::BenchmarkReporter::Run bench_alp_fused_decode(alp_bench::Column& dataset,
int64_t* ffor_arr,
uint8_t bw,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
#------------------------------------------------------------------------------------------------------
if (ALP_BUILD_TESTING)
add_executable(fallback_scalar_aav_1024_uf1_falp_test fallback_scalar_aav_1024_uf1_falp_test.cpp fallback_scalar_aav_1024_uf1_falp_bench.cpp)
target_link_libraries(fallback_scalar_aav_1024_uf1_falp_test PRIVATE ALP)
target_link_libraries(fallback_scalar_aav_1024_uf1_falp_test PRIVATE gtest_main)
target_include_directories(fallback_scalar_aav_1024_uf1_falp_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
gtest_discover_tests(fallback_scalar_aav_1024_uf1_falp_test)
endif ()
add_executable(fallback_scalar_aav_1024_uf1_falp_test fallback_scalar_aav_1024_uf1_falp_test.cpp)
target_link_libraries(fallback_scalar_aav_1024_uf1_falp_test PRIVATE ALP)
target_link_libraries(fallback_scalar_aav_1024_uf1_falp_test PRIVATE gtest_main)
target_include_directories(fallback_scalar_aav_1024_uf1_falp_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
gtest_discover_tests(fallback_scalar_aav_1024_uf1_falp_test)
#------------------------------------------------------------------------------------------------------
if (ALP_BUILD_BENCHMARKING)
configure_file(${CMAKE_SOURCE_DIR}/benchmarks/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_falp_bench.hpp)
add_executable(fallback_scalar_aav_1024_uf1_falp_bench fallback_scalar_aav_1024_uf1_falp_bench.cpp)
target_link_libraries(fallback_scalar_aav_1024_uf1_falp_bench PRIVATE ALP)
target_include_directories(fallback_scalar_aav_1024_uf1_falp_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
add_alp_benchmark(fallback_scalar_aav_1024_uf1_falp_bench)
endif ()
configure_file(${CMAKE_SOURCE_DIR}/benchmarks/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_falp_bench.hpp)
add_executable(fallback_scalar_aav_1024_uf1_falp_bench fallback_scalar_aav_1024_uf1_falp_bench.cpp)
target_link_libraries(fallback_scalar_aav_1024_uf1_falp_bench PRIVATE ALP)
target_include_directories(fallback_scalar_aav_1024_uf1_falp_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
add_alp_benchmark(fallback_scalar_aav_1024_uf1_falp_bench)
Original file line number Diff line number Diff line change
@@ -1,128 +1,152 @@
#include "fallback_scalar_nav_1024_uf1_falp_bench.hpp"
#include "alp/alp.hpp"
#include "datasets.hpp"
#include "alp/ffor.hpp"
#include "alp/unffor.hpp"
static __attribute__((noinline)) benchmark::BenchmarkReporter::Run bench_alp_fused_decode(alp_bench::Dataset& dataset, int64_t* ffor_arr, uint8_t bw, int64_t*base_arr,uint8_t factor,uint8_t exponent,double* dec_dbl_arr,double* exc_arr,uint16_t* pos_arr,uint16_t* exc_c_arr)
{
#include "experimental_falp.hpp"
#include "alp.hpp"
#include "data.hpp"

static __attribute__((noinline)) benchmark::BenchmarkReporter::Run bench_alp_fused_decode(alp_bench::Column& dataset,
int64_t* ffor_arr,
uint8_t bw,
int64_t* base_arr,
uint8_t factor,
uint8_t exponent,
double* dec_dbl_arr,
double* exc_arr,
uint16_t* pos_arr,
uint16_t* exc_c_arr) {
int benchmark_number = dataset.id;

#ifdef NDEBUG
uint64_t iterations = 3000000;
#else
uint64_t iterations = 1;
#endif

std::string benchmark_name = dataset.name + "_fused";

uint64_t cycles = benchmark::cycleclock::Now();
for (uint64_t i = 0; i < iterations; ++i) {
generated::falp::fallback::scalar::falp(reinterpret_cast<uint64_t*>(ffor_arr),
dec_dbl_arr,
bw,
reinterpret_cast<uint64_t*>(base_arr),
factor,
exponent);
alp::AlpDecode<double>::patch_exceptions(dec_dbl_arr, exc_arr, pos_arr, exc_c_arr);
}

cycles = benchmark::cycleclock::Now() - cycles;

return benchmark::BenchmarkReporter::Run(
benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024));
#ifdef NDEBUG
uint64_t iterations = 3000000;
#else
uint64_t iterations = 1;
#endif

std::string benchmark_name = dataset.name + "_fused";

uint64_t cycles = benchmark::cycleclock::Now();
for (uint64_t i = 0; i < iterations; ++i) {
experimental::generated::falp::fallback::scalar::falp(reinterpret_cast<uint64_t*>(ffor_arr),
dec_dbl_arr,
bw,
reinterpret_cast<uint64_t*>(base_arr),
factor,
exponent);
alp::decoder<double>::patch_exceptions(dec_dbl_arr, exc_arr, pos_arr, exc_c_arr);
}

cycles = benchmark::cycleclock::Now() - cycles;

return benchmark::BenchmarkReporter::Run(
benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024));
}
static __attribute__((noinline)) benchmark::BenchmarkReporter::Run bench_alp_decode(alp_bench::Dataset& dataset, int64_t* ffor_arr, int64_t* unffor_arr, uint8_t bw, int64_t* base_arr, uint8_t factor, uint8_t exponent, double* dec_dbl_arr, double* exc_arr, uint16_t* pos_arr, uint16_t* exc_c_arr)
{

int benchmark_number = dataset.id;

#ifdef NDEBUG
uint64_t iterations = 3000000;
#else
uint64_t iterations = 1;
#endif

std::string benchmark_name = dataset.name + "";

uint64_t cycles = benchmark::cycleclock::Now();
for (uint64_t i = 0; i < iterations; ++i) {
alp::generated::unffor::fallback::scalar::unffor(ffor_arr, unffor_arr, bw, base_arr);
alp::AlpDecode<double>(reinterpret_cast<const uint64_t*>(unffor_arr), factor, exponent, dec_dbl_arr);
alp::AlpDecode<double>::patch_exceptions(dec_dbl_arr, exc_arr, pos_arr, exc_c_arr);
}

cycles = benchmark::cycleclock::Now() - cycles;

return benchmark::BenchmarkReporter::Run(
benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024));

static __attribute__((noinline)) benchmark::BenchmarkReporter::Run bench_alp_decode(alp_bench::Column& dataset,
int64_t* ffor_arr,
int64_t* unffor_arr,
uint8_t bw,
int64_t* base_arr,
uint8_t factor,
uint8_t exponent,
double* dec_dbl_arr,
double* exc_arr,
uint16_t* pos_arr,
uint16_t* exc_c_arr) {

int benchmark_number = dataset.id;

#ifdef NDEBUG
uint64_t iterations = 3000000;
#else
uint64_t iterations = 1;
#endif

std::string benchmark_name = dataset.name + "";

uint64_t cycles = benchmark::cycleclock::Now();
for (uint64_t i = 0; i < iterations; ++i) {
fastlanes::generated::unffor::fallback::scalar::unffor(ffor_arr, unffor_arr, bw, base_arr);
alp::decoder<double>::decode(unffor_arr, factor, exponent, dec_dbl_arr);
alp::decoder<double>::patch_exceptions(dec_dbl_arr, exc_arr, pos_arr, exc_c_arr);
}

cycles = benchmark::cycleclock::Now() - cycles;

return benchmark::BenchmarkReporter::Run(
benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024));
}
void benchmark_all(benchmark::Benchmark& benchmark)
{

double* dbl_arr;
double* exc_arr;
uint16_t* pos_arr;
uint16_t* exc_c_arr;
int64_t* ffor_arr;
int64_t* unffor_arr;

int64_t* base_arr;
int64_t* dig_arr;
double* dec_dbl_arr;

uint8_t bw;
uint8_t factor;
uint8_t exponent;

dbl_arr = new (std::align_val_t {64}) double[1024];
exc_arr = new (std::align_val_t {64}) double[1024];
pos_arr = new (std::align_val_t {64}) uint16_t[1024];
dig_arr = new (std::align_val_t {64}) int64_t[1024];
dec_dbl_arr = new (std::align_val_t {64}) double[1024];
exc_c_arr = new (std::align_val_t {64}) uint16_t[1024];
ffor_arr = new (std::align_val_t {64}) int64_t[1024];
unffor_arr = new (std::align_val_t {64}) int64_t[1024];
base_arr = new (std::align_val_t {64}) int64_t[1024];

for (auto& dataset : alp_bench::datasets) {
std::ifstream ifile(dataset.sample_csv_file_path, std::ios::in);

// check to see that the file was opened correctly:
if (!ifile.is_open()) {
exit(1); // exit or do additional error checking
}

double num = 0.0;
// keep storing values from the text file so long as data exists:
size_t c {0};
while (ifile >> num) {
dbl_arr[c] = num;
c += 1;
}

factor = dataset.factor;
exponent = dataset.exponent;

alp::AlpEncode<double>::encode(dbl_arr, exc_arr, pos_arr, exc_c_arr, dig_arr, stt);
alp::AlpEncode<double>::analyze_ffor(dig_arr, bw, base_arr);
alp::generated::ffor::fallback::scalar::ffor(dig_arr, ffor_arr, bw, base_arr);

benchmark.Run(bench_alp_fused_decode(
dataset, unffor_arr, bw, base_arr, factor, exponent, dec_dbl_arr, exc_arr, pos_arr, exc_c_arr));

benchmark.Run(bench_alp_decode(
dataset, ffor_arr, unffor_arr, bw, base_arr, factor, exponent, dec_dbl_arr, exc_arr, pos_arr, exc_c_arr));

ifile.close();}
void benchmark_all(benchmark::Benchmark& benchmark) {

double* dbl_arr;
double* exc_arr;
uint16_t* pos_arr;
uint16_t* exc_c_arr;
int64_t* ffor_arr;
int64_t* unffor_arr;

int64_t* base_arr;
int64_t* dig_arr;
double* dec_dbl_arr;
double* smp_arr;


uint8_t bw;
uint8_t factor;
uint8_t exponent;

dbl_arr = new (std::align_val_t {64}) double[1024];
exc_arr = new (std::align_val_t {64}) double[1024];
pos_arr = new (std::align_val_t {64}) uint16_t[1024];
dig_arr = new (std::align_val_t {64}) int64_t[1024];
dec_dbl_arr = new (std::align_val_t {64}) double[1024];
exc_c_arr = new (std::align_val_t {64}) uint16_t[1024];
ffor_arr = new (std::align_val_t {64}) int64_t[1024];
unffor_arr = new (std::align_val_t {64}) int64_t[1024];
base_arr = new (std::align_val_t {64}) int64_t[1024];
smp_arr = new (std::align_val_t {64}) double[1024];

alp::state<double> stt;



for (auto& dataset : alp_bench::get_alp_dataset()) {
std::ifstream ifile(dataset.csv_file_path, std::ios::in);

// check to see that the file was opened correctly:
if (!ifile.is_open()) {
exit(1); // exit or do additional error checking
}

double num = 0.0;
// keep storing values from the text file so long as data exists:
size_t c {0};
while (ifile >> num) {
dbl_arr[c] = num;
c += 1;
}

factor = dataset.factor;
exponent = dataset.exponent;

alp::encoder<double>::init(dbl_arr, 0, 1024, smp_arr, stt);

alp::encoder<double>::encode(dbl_arr, exc_arr, pos_arr, exc_c_arr, dig_arr, stt);
alp::encoder<double>::analyze_ffor(dig_arr, bw, base_arr);
fastlanes::generated::ffor::fallback::scalar::ffor(dig_arr, ffor_arr, bw, base_arr);

benchmark.Run(bench_alp_fused_decode(
dataset, unffor_arr, bw, base_arr, factor, exponent, dec_dbl_arr, exc_arr, pos_arr, exc_c_arr));

benchmark.Run(bench_alp_decode(
dataset, ffor_arr, unffor_arr, bw, base_arr, factor, exponent, dec_dbl_arr, exc_arr, pos_arr, exc_c_arr));

ifile.close();
}
}
int main()
{
benchmark::Benchmark benchmark =
benchmark::create("fallback_scalar_nav_1024_uf1_falp")
.save()
.at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile())
.print()
.add_extra_info(benchmark::CmakeInfo::getCmakeInfo());
benchmark_all(benchmark);
int main() {
benchmark::Benchmark benchmark =
benchmark::create("fallback_scalar_nav_1024_uf1_falp")
.save()
.at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile())
.print()
.add_extra_info(benchmark::CmakeInfo::getCmakeInfo());
benchmark_all(benchmark);
}
Loading

0 comments on commit a3f93a2

Please sign in to comment.