From 341687aaf6147db86959eee297c651561c64455b Mon Sep 17 00:00:00 2001 From: Ethan Williams Date: Mon, 2 Dec 2024 13:38:47 -0500 Subject: [PATCH] added BENCHMARK_SHELL to found benchmarks (#37) * added BENCHMARK_SHELL to found benchmarks * updated script globs after moving the benchamrks --- covid-mts/run.sh | 10 ++-- file-enc/run.sh | 5 +- infrastructure/data/script-globs.json | 4 +- log-analysis/run.sh | 6 ++- max-temp/run.sh | 4 +- media-conv/run.sh | 6 ++- nlp/run.sh | 4 +- oneliners/run.sh | 4 +- sklearn/run.sh | 74 +++------------------------ sklearn/scripts/run.sh | 72 ++++++++++++++++++++++++++ uniq-ips/run.sh | 11 +++- uniq-ips/scripts/run.sh | 1 + unix50/run.sh | 3 +- web-index/run.sh | 3 +- 14 files changed, 121 insertions(+), 86 deletions(-) mode change 100755 => 100644 sklearn/run.sh create mode 100755 sklearn/scripts/run.sh mode change 100755 => 100644 uniq-ips/run.sh create mode 100755 uniq-ips/scripts/run.sh diff --git a/covid-mts/run.sh b/covid-mts/run.sh index d60c69ac..57d478e2 100755 --- a/covid-mts/run.sh +++ b/covid-mts/run.sh @@ -15,8 +15,10 @@ input_file="$input_dir/in$suffix.csv" output_scoped="$outputs_dir/outputs$suffix" mkdir -p "$output_scoped" -"$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out" -"$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out" -"$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out" -"$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out" +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} + +$BENCHMARK_SHELL "$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out" +$BENCHMARK_SHELL "$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out" +$BENCHMARK_SHELL "$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out" +$BENCHMARK_SHELL "$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out" diff --git a/file-enc/run.sh b/file-enc/run.sh index 645ebe37..2df87c9b 100755 --- a/file-enc/run.sh +++ b/file-enc/run.sh @@ -17,5 +17,6 @@ if [[ "$1" == "--small" ]]; then suffix=".small" fi -$scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix -$scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} +$BENCHMARK_SHELL $scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix +$BENCHMARK_SHELL $scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix diff --git a/infrastructure/data/script-globs.json b/infrastructure/data/script-globs.json index 2082d205..77888905 100644 --- a/infrastructure/data/script-globs.json +++ b/infrastructure/data/script-globs.json @@ -21,13 +21,13 @@ "scripts": ["oneliners/scripts/*.sh"] }, "sklearn": { - "scripts": ["sklearn/run.sh"] + "scripts": ["sklearn/scripts/run.sh"] }, "riker": { "scripts": ["riker/scripts/*/build.sh"] }, "uniq-ips": { - "scripts": ["uniq-ips/run.sh"] + "scripts": ["uniq-ips/scripts/run.sh"] }, "unix50": { "scripts": ["unix50/scripts/*.sh"] diff --git a/log-analysis/run.sh b/log-analysis/run.sh index 50dc1f21..66b2035a 100755 --- a/log-analysis/run.sh +++ b/log-analysis/run.sh @@ -17,10 +17,12 @@ if [[ "$@" == *"--small"* ]]; then suffix=".small" fi +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} + echo "nginx" -time $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix +$BENCHMARK_SHELL $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix echo $? echo "pcaps" -time $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix +$BENCHMARK_SHELL $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix echo $? diff --git a/max-temp/run.sh b/max-temp/run.sh index a7337e59..5bc6a7d4 100755 --- a/max-temp/run.sh +++ b/max-temp/run.sh @@ -18,4 +18,6 @@ export statistics_dir="$results_dir/statistics$suffix" mkdir -p "$statistics_dir" -${scripts_dir}/temp-analytics.sh +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} + +$BENCHMARK_SHELL ${scripts_dir}/temp-analytics.sh diff --git a/media-conv/run.sh b/media-conv/run.sh index 93aa0961..c9136726 100755 --- a/media-conv/run.sh +++ b/media-conv/run.sh @@ -17,10 +17,12 @@ if [[ "$@" == *"--small"* ]]; then suffix=".small" fi +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} + echo "img_convert" -time $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log +$BENCHMARK_SHELL $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log echo $? echo "to_mp3" -time $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log +$BENCHMARK_SHELL $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log echo $? diff --git a/nlp/run.sh b/nlp/run.sh index 92d2ed6b..f5afb2fe 100755 --- a/nlp/run.sh +++ b/nlp/run.sh @@ -12,6 +12,8 @@ else export IN="$SUITE_DIR/inputs/pg" fi +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} + mkdir -p "outputs" # Define the script names in a single variable @@ -51,6 +53,6 @@ while IFS= read -r script; do mkdir -p "$output_dir" echo "$script" - time "$SHELL" "$script_file" "$output_dir" + $BENCHMARK_SHELL "$SHELL" "$script_file" "$output_dir" echo "$?" done <<< "$script_names" diff --git a/oneliners/run.sh b/oneliners/run.sh index cf40e8e5..4659c4bd 100755 --- a/oneliners/run.sh +++ b/oneliners/run.sh @@ -4,6 +4,8 @@ export SUITE_DIR=$(realpath $(dirname "$0")) export TIMEFORMAT=%R cd $SUITE_DIR +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} + if [[ "$@" == *"--small"* ]]; then scripts_inputs=( "nfa-regex;1M" @@ -42,6 +44,6 @@ do output_file="./outputs/${parsed[0]}.out" echo "$script_file" - time "$SHELL" "$script_file" "$input_file" > "$output_file" + $BENCHMARK_SHELL "$SHELL" "$script_file" "$input_file" > "$output_file" echo "$?" done diff --git a/sklearn/run.sh b/sklearn/run.sh old mode 100755 new mode 100644 index a3c57c8c..6758bdfc --- a/sklearn/run.sh +++ b/sklearn/run.sh @@ -1,72 +1,10 @@ #!/bin/bash -PYTHON="python3" -OUT=${OUT:-$PWD/result} -TMP=${TMP:-$PWD/tmp} -#export tmp to env -export TMP -SCRIPTS=${SCRIPTS:-$PWD/scripts} +REPO_TOP=$(git rev-parse --show-toplevel) +eval_dir="${REPO_TOP}/sklearn" +scripts_dir="${eval_dir}/scripts" -# Ideally, we'll move on to piping rather than writing to a file -MODEL=$TMP/model.obj -X=$TMP/X_train.obj -y=$TMP/y_train.obj -CLASSES=$TMP/classes.obj -DUAL=false # should be converted to bool inside script -MAX_SQ_SUM=$TMP/max_squared_sum.obj -WARM_COEF=$TMP/warm_start_coef.obj -C_=$TMP/C_.obj +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} +cd "$eval_dir" # scripts/run.sh references PWD +$BENCHMARK_SHELL "$scripts_dir/run.sh" $@ -echo $PYTHON >&2 -echo "DIR: $DIR" >&2 -echo "SCRIPTS: $SCRIPTS" >&2 -echo "MODEL: $MODEL" >&2 -echo "X: $X" >&2 -echo "y: $y" >&2 -echo "CLASSES: $CLASSES" >&2 -echo "DUAL: $DUAL" >&2 -echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2 -echo "WARM_COEF: $WARM_COEF" >&2 -echo "C_: $C_" >&2 - -# TODO: Try this out on a larger dataset -# TODO: Benchmark each phase - -# Generating model & samples -$PYTHON $SCRIPTS/gen_model.py 100 -$PYTHON $SCRIPTS/gen_samples.py - -# Validity checking functions -# These functions just check to make sure that the input is valid. -# If not they will raise an error. Otherwise, they do not mutate the data. -$PYTHON $SCRIPTS/check_solver.py $MODEL -penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL) -$PYTHON $SCRIPTS/val_data.py $MODEL $X $y -$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y -echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2 -multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL) -echo "------" >&2 -# TODO: Benchmark each step of the pipeline -# Make a modified pipeline where each step writes its output to a file - -# Calculations functions -$PYTHON $SCRIPTS/rownorm.py $X -n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES) -$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients - -# Covtype dataset has 7 classes -echo "WARM_COEF: $WARM_COEF" >&2 -echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2 - -echo "multiclass: $multiclass" >&2 -echo "penalty: $penalty" >&2 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6 -$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7 - -$PYTHON $SCRIPTS/zip_coef.py $MODEL -$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj diff --git a/sklearn/scripts/run.sh b/sklearn/scripts/run.sh new file mode 100755 index 00000000..a3c57c8c --- /dev/null +++ b/sklearn/scripts/run.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +PYTHON="python3" +OUT=${OUT:-$PWD/result} +TMP=${TMP:-$PWD/tmp} +#export tmp to env +export TMP +SCRIPTS=${SCRIPTS:-$PWD/scripts} + +# Ideally, we'll move on to piping rather than writing to a file +MODEL=$TMP/model.obj +X=$TMP/X_train.obj +y=$TMP/y_train.obj +CLASSES=$TMP/classes.obj +DUAL=false # should be converted to bool inside script +MAX_SQ_SUM=$TMP/max_squared_sum.obj +WARM_COEF=$TMP/warm_start_coef.obj +C_=$TMP/C_.obj + +echo $PYTHON >&2 +echo "DIR: $DIR" >&2 +echo "SCRIPTS: $SCRIPTS" >&2 +echo "MODEL: $MODEL" >&2 +echo "X: $X" >&2 +echo "y: $y" >&2 +echo "CLASSES: $CLASSES" >&2 +echo "DUAL: $DUAL" >&2 +echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2 +echo "WARM_COEF: $WARM_COEF" >&2 +echo "C_: $C_" >&2 + +# TODO: Try this out on a larger dataset +# TODO: Benchmark each phase + +# Generating model & samples +$PYTHON $SCRIPTS/gen_model.py 100 +$PYTHON $SCRIPTS/gen_samples.py + +# Validity checking functions +# These functions just check to make sure that the input is valid. +# If not they will raise an error. Otherwise, they do not mutate the data. +$PYTHON $SCRIPTS/check_solver.py $MODEL +penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL) +$PYTHON $SCRIPTS/val_data.py $MODEL $X $y +$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y +echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2 +multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL) +echo "------" >&2 +# TODO: Benchmark each step of the pipeline +# Make a modified pipeline where each step writes its output to a file + +# Calculations functions +$PYTHON $SCRIPTS/rownorm.py $X +n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES) +$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients + +# Covtype dataset has 7 classes +echo "WARM_COEF: $WARM_COEF" >&2 +echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2 + +echo "multiclass: $multiclass" >&2 +echo "penalty: $penalty" >&2 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6 +$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7 + +$PYTHON $SCRIPTS/zip_coef.py $MODEL +$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj diff --git a/uniq-ips/run.sh b/uniq-ips/run.sh old mode 100755 new mode 100644 index a04f9584..ac96fa12 --- a/uniq-ips/run.sh +++ b/uniq-ips/run.sh @@ -1 +1,10 @@ -cat "logs-popcount-org.txt" | sort | uniq > "out.txt" +#!/bin/bash + +REPO_TOP=$(git rev-parse --show-toplevel) +eval_dir="${REPO_TOP}/uniq-ips" +scripts_dir="${eval_dir}/scripts" + +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} +cd "$eval_dir" # scripts/run.sh puts files in its current directory +$BENCHMARK_SHELL "$scripts_dir/run.sh" $@ + diff --git a/uniq-ips/scripts/run.sh b/uniq-ips/scripts/run.sh new file mode 100755 index 00000000..a04f9584 --- /dev/null +++ b/uniq-ips/scripts/run.sh @@ -0,0 +1 @@ +cat "logs-popcount-org.txt" | sort | uniq > "out.txt" diff --git a/unix50/run.sh b/unix50/run.sh index a8f920a9..26b23002 100755 --- a/unix50/run.sh +++ b/unix50/run.sh @@ -54,6 +54,7 @@ fi echo executing unix50 $(date) mkdir -p "outputs" +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} for script_input in ${scripts_inputs[@]}; do @@ -67,6 +68,6 @@ do output_file="./outputs/$script.out" echo "$script" - time $SHELL $script_file $input_file > $output_file + $BENCHMARK_SHELL $script_file $input_file > $output_file echo $? done diff --git a/web-index/run.sh b/web-index/run.sh index 197f8000..5b6fd32d 100755 --- a/web-index/run.sh +++ b/web-index/run.sh @@ -2,6 +2,7 @@ cd "$(dirname "$0")" +BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash} directory_path="inputs/articles" if [ ! -d "$directory_path" ]; then @@ -25,5 +26,5 @@ fi mkdir -p "$OUTPUT_BASE" echo "web-index" -time $SHELL ./scripts/ngrams.sh "$OUTPUT_BASE" +$BENCHMARK_SHELL ./scripts/ngrams.sh "$OUTPUT_BASE" echo $?