Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark driver fixes #3

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,34 @@ ifeq ($(shell uname), Darwin)
argp:=-largp -pagezero_size 10000 -image_base 100000000
endif

all: LuaJIT/Makefile
cd ./LuaJIT && make -j
$(CC) main.c -O3 -c -o main.o -I ./LuaJIT/src
$(CC) main.o -lpthread ${argp} ./LuaJIT/src/libluajit.a -lm -ldl -o bench
ifdef BENCH_DURATION
DURATION = -DBENCH_DURATION=$(BENCH_DURATION)
endif

ifndef LUAJIT_PATH
LUAJIT_PATH = ./LuaJIT
endif

LUAJIT_A = $(LUAJIT_PATH)/src/libluajit.a

all: $(LUAJIT_PATH)/src/libluajit.a
$(CC) main.c $(DURATION) -O3 -c -o main.o -I $(LUAJIT_PATH)/src
$(CC) main.o -lpthread ${argp} $^ -lm -ldl -o bench

$(LUAJIT_A): $(LUAJIT_PATH)/Makefile
make -C $(LUAJIT_PATH) -j

LuaJIT/Makefile:
git submodule update --init --recursive

clean:
cd ./LuaJIT && make clean
rm bench main.o

distclean:
rm -rf LuaJIT
mkdir LuaJIT
rm bench main.o

run: bench
@echo "<benchmark>:<total runs>:<ops/s>"
@ls *.lua | while read f; do echo -n "$$f:"; ./bench $$f; done
107 changes: 82 additions & 25 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <unistd.h>
#include <sys/mman.h>
#include <argp.h>
#include <sys/param.h>
#include <string.h>

#include "lua.h"
#include "lualib.h"
Expand All @@ -17,35 +19,47 @@
typedef struct {
uint64_t ctr;
char *file;
double start_ns;
uint64_t start_ns;
uint64_t end_ns;
} lua_task;

/* Benchmark execution duration in seconds. */
#ifndef BENCH_DURATION
# define BENCH_DURATION 10
#endif

#define TIMESPEC_NS(t) ((t).tv_sec * 1e9 + (t).tv_nsec)

void *wrapper(void *arg) {
lua_task *task = (lua_task*)arg;
struct timespec start_time;
struct timespec cur_time;
double start_time_ns = task->start_ns;
uint64_t ctr = 0;

int status = clock_gettime(CLOCK_MONOTONIC, &start_time);

if (status != 0) {
fprintf(stderr, "clock_gettime failed: %s\n", strerror (errno));
exit(1);
}

uint64_t cur_time_ns = TIMESPEC_NS (start_time);
uint64_t stop_time_ns = TIMESPEC_NS (start_time) + BENCH_DURATION * 1e9;

lua_State *L;
L = luaL_newstate();
luaL_openlibs(L);

int status = luaL_loadfile(L, task->file);
if (status) {
fprintf(stderr, "Couldn't load file: %s\n", lua_tostring(L, -1));
exit(1);
}
status = luaL_loadfile(L, task->file);
if (status) {
fprintf(stderr, "Couldn't load file: %s\n", lua_tostring(L, -1));
exit(1);
}

int cb_ref = luaL_ref(L, LUA_REGISTRYINDEX);

do {
clock_gettime(CLOCK_MONOTONIC, &cur_time);

double cur_time_ns = cur_time.tv_sec * 1e9 + cur_time.tv_nsec;
if (start_time_ns + 10 * 1e9 < cur_time_ns) {
break;
}

do {
lua_rawgeti(L, LUA_REGISTRYINDEX, cb_ref);
int result = lua_pcall(L, 0, 0, 0);
if (result) {
Expand All @@ -54,10 +68,18 @@ void *wrapper(void *arg) {
}

ctr++;
status = clock_gettime(CLOCK_MONOTONIC, &cur_time);
if (status != 0) {
fprintf(stderr, "clock_gettime failed: %s\n", strerror (errno));
exit(1);
}

} while(1);
cur_time_ns = TIMESPEC_NS (cur_time);
} while (stop_time_ns > cur_time_ns);;

task->ctr = ctr;
task->start_ns = TIMESPEC_NS (start_time);
task->end_ns = cur_time_ns;

return NULL;
}
Expand All @@ -80,7 +102,7 @@ static char doc[] =
static char args_doc[] = "file1";

static struct argp_option options[] = {
{"concurency", 'c', "concurency", 0, "Number of threads" },
{"concurrency", 'c', "concurrency", 0, "Number of threads" },
{ 0 }
};

Expand Down Expand Up @@ -122,28 +144,63 @@ int main(int argc, char *argv[]) {
size_t len;
struct stat s;
uint64_t total = 0;
struct timespec start_time;

cmd_line_options arguments = (cmd_line_options){.file_name = "", .q = 8, .c = 1};
argp_parse (&argp, argc, argv, 0, 0, &arguments);
int ret = argp_parse (&argp, argc, argv, 0, 0, &arguments);

if (ret != 0) {
fprintf (stderr, "Unexpected error while parsing command line arguments: %s\n",
strerror (ret));
return 1;
}

int nthreads = arguments.c;
threads = malloc(nthreads * sizeof(pthread_t));
tasks = malloc(nthreads * sizeof(lua_task));

clock_gettime(CLOCK_MONOTONIC, &start_time);
double start_time_ns = start_time.tv_sec * 1e9 + start_time.tv_nsec;
if (threads == NULL || tasks == NULL) {
fprintf(stderr, "Unable to allocate memory: %s\n", strerror (errno));
return 2;
}

for (i = 0; i < nthreads; i++) {
tasks[i] = (lua_task){.file = arguments.file_name, .start_ns = start_time_ns};
pthread_create(&threads[i], NULL, wrapper, &tasks[i]);
tasks[i] = (lua_task){.file = arguments.file_name};
int ret = pthread_create(&threads[i], NULL, wrapper, &tasks[i]);

if (ret != 0) {
fprintf (stderr, "Cannot create thread(%d): %s\n", i,
strerror (errno));
return 3;
}
}

uint64_t start_time_ns = 0;
uint64_t end_time_ns = 0;

for (i = 0; i < nthreads; i++) {
pthread_join(threads[i], NULL);
int ret = pthread_join(threads[i], NULL);

if (ret != 0) {
fprintf (stderr, "Thread join failed(%d): %s\n", i,
strerror (errno));
return 4;
}

total += tasks[i].ctr;
/* Select the earliest starting time and the latest end time. */
if (start_time_ns == 0) {
start_time_ns = tasks[i].start_ns;
end_time_ns = tasks[i].end_ns;
}
else {
start_time_ns = MIN (start_time_ns, tasks[i].start_ns);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not entirely convinced that this method is much more accurate. Maybe the more accurate way would be to compute the average between all threads?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Average will give you a throughput rate per thread, which is not incorrect, but definitely a different thing from total throughput. If you want total throughput you either get start and end time in the main thread or you get them in individual threads and get the earliest start time and latest end time. The latter eliminates some amount of the cloning and joining latency, although it shouldn't make a significant difference if the benchmark environment is controlled.

end_time_ns = MAX (end_time_ns, tasks[i].end_ns);
}
}

fprintf(stderr, "Total times executed: %"PRId64"\n", total);
fprintf(stdout, "ops/s %.2f\n", (double)total / 10);
uint64_t duration = end_time_ns - start_time_ns;

printf("%"PRId64":%.4f\n", total, (double) total * 1e9 / duration);

return 0;
}