From 341687aaf6147db86959eee297c651561c64455b Mon Sep 17 00:00:00 2001
From: Ethan Williams <ethan@ethan.ws>
Date: Mon, 2 Dec 2024 13:38:47 -0500
Subject: [PATCH] added BENCHMARK_SHELL to found benchmarks (#37)

* added BENCHMARK_SHELL to found benchmarks

* updated script globs after moving the benchamrks
---
 covid-mts/run.sh                      | 10 ++--
 file-enc/run.sh                       |  5 +-
 infrastructure/data/script-globs.json |  4 +-
 log-analysis/run.sh                   |  6 ++-
 max-temp/run.sh                       |  4 +-
 media-conv/run.sh                     |  6 ++-
 nlp/run.sh                            |  4 +-
 oneliners/run.sh                      |  4 +-
 sklearn/run.sh                        | 74 +++------------------------
 sklearn/scripts/run.sh                | 72 ++++++++++++++++++++++++++
 uniq-ips/run.sh                       | 11 +++-
 uniq-ips/scripts/run.sh               |  1 +
 unix50/run.sh                         |  3 +-
 web-index/run.sh                      |  3 +-
 14 files changed, 121 insertions(+), 86 deletions(-)
 mode change 100755 => 100644 sklearn/run.sh
 create mode 100755 sklearn/scripts/run.sh
 mode change 100755 => 100644 uniq-ips/run.sh
 create mode 100755 uniq-ips/scripts/run.sh

diff --git a/covid-mts/run.sh b/covid-mts/run.sh
index d60c69ac..57d478e2 100755
--- a/covid-mts/run.sh
+++ b/covid-mts/run.sh
@@ -15,8 +15,10 @@ input_file="$input_dir/in$suffix.csv"
 output_scoped="$outputs_dir/outputs$suffix"
 mkdir -p "$output_scoped"
 
-"$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
-"$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
-"$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
-"$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
+$BENCHMARK_SHELL "$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
+$BENCHMARK_SHELL "$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
+$BENCHMARK_SHELL "$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
+$BENCHMARK_SHELL "$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"
 
diff --git a/file-enc/run.sh b/file-enc/run.sh
index 645ebe37..2df87c9b 100755
--- a/file-enc/run.sh
+++ b/file-enc/run.sh
@@ -17,5 +17,6 @@ if [[ "$1" == "--small" ]]; then
     suffix=".small"
 fi
 
-$scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix
-$scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+$BENCHMARK_SHELL $scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix
+$BENCHMARK_SHELL $scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix
diff --git a/infrastructure/data/script-globs.json b/infrastructure/data/script-globs.json
index 2082d205..77888905 100644
--- a/infrastructure/data/script-globs.json
+++ b/infrastructure/data/script-globs.json
@@ -21,13 +21,13 @@
         "scripts": ["oneliners/scripts/*.sh"]
     },
     "sklearn": {
-        "scripts": ["sklearn/run.sh"]
+        "scripts": ["sklearn/scripts/run.sh"]
     },
     "riker": {
         "scripts": ["riker/scripts/*/build.sh"]
     },
     "uniq-ips": {
-        "scripts": ["uniq-ips/run.sh"]
+        "scripts": ["uniq-ips/scripts/run.sh"]
     },
     "unix50": {
         "scripts": ["unix50/scripts/*.sh"]
diff --git a/log-analysis/run.sh b/log-analysis/run.sh
index 50dc1f21..66b2035a 100755
--- a/log-analysis/run.sh
+++ b/log-analysis/run.sh
@@ -17,10 +17,12 @@ if [[ "$@" == *"--small"* ]]; then
     suffix=".small"
 fi
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 echo "nginx"
-time $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix 
+$BENCHMARK_SHELL $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix 
 echo $?
 
 echo "pcaps"
-time $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix 
+$BENCHMARK_SHELL $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix 
 echo $?
diff --git a/max-temp/run.sh b/max-temp/run.sh
index a7337e59..5bc6a7d4 100755
--- a/max-temp/run.sh
+++ b/max-temp/run.sh
@@ -18,4 +18,6 @@ export statistics_dir="$results_dir/statistics$suffix"
 
 mkdir -p "$statistics_dir"
 
-${scripts_dir}/temp-analytics.sh
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
+$BENCHMARK_SHELL ${scripts_dir}/temp-analytics.sh
diff --git a/media-conv/run.sh b/media-conv/run.sh
index 93aa0961..c9136726 100755
--- a/media-conv/run.sh
+++ b/media-conv/run.sh
@@ -17,10 +17,12 @@ if [[ "$@" == *"--small"* ]]; then
     suffix=".small"
 fi
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 echo "img_convert"
-time $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log
+$BENCHMARK_SHELL $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log
 echo $?
 
 echo "to_mp3"
-time $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log
+$BENCHMARK_SHELL $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log
 echo $?
diff --git a/nlp/run.sh b/nlp/run.sh
index 92d2ed6b..f5afb2fe 100755
--- a/nlp/run.sh
+++ b/nlp/run.sh
@@ -12,6 +12,8 @@ else
     export IN="$SUITE_DIR/inputs/pg"
 fi
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 mkdir -p "outputs"
 
 # Define the script names in a single variable
@@ -51,6 +53,6 @@ while IFS= read -r script; do
     mkdir -p "$output_dir"
 
     echo "$script"
-    time "$SHELL" "$script_file" "$output_dir"
+    $BENCHMARK_SHELL "$SHELL" "$script_file" "$output_dir"
     echo "$?"
 done <<< "$script_names"
diff --git a/oneliners/run.sh b/oneliners/run.sh
index cf40e8e5..4659c4bd 100755
--- a/oneliners/run.sh
+++ b/oneliners/run.sh
@@ -4,6 +4,8 @@ export SUITE_DIR=$(realpath $(dirname "$0"))
 export TIMEFORMAT=%R
 cd $SUITE_DIR
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 if [[ "$@" == *"--small"* ]]; then
     scripts_inputs=(
         "nfa-regex;1M"
@@ -42,6 +44,6 @@ do
     output_file="./outputs/${parsed[0]}.out"
 
     echo "$script_file"
-    time "$SHELL" "$script_file" "$input_file" > "$output_file"
+    $BENCHMARK_SHELL "$SHELL" "$script_file" "$input_file" > "$output_file"
     echo "$?"
 done
diff --git a/sklearn/run.sh b/sklearn/run.sh
old mode 100755
new mode 100644
index a3c57c8c..6758bdfc
--- a/sklearn/run.sh
+++ b/sklearn/run.sh
@@ -1,72 +1,10 @@
 #!/bin/bash
 
-PYTHON="python3"
-OUT=${OUT:-$PWD/result}
-TMP=${TMP:-$PWD/tmp}
-#export tmp to env
-export TMP
-SCRIPTS=${SCRIPTS:-$PWD/scripts}
+REPO_TOP=$(git rev-parse --show-toplevel)
+eval_dir="${REPO_TOP}/sklearn"
+scripts_dir="${eval_dir}/scripts"
 
-# Ideally, we'll move on to piping rather than writing to a file
-MODEL=$TMP/model.obj
-X=$TMP/X_train.obj
-y=$TMP/y_train.obj
-CLASSES=$TMP/classes.obj
-DUAL=false # should be converted to bool inside script
-MAX_SQ_SUM=$TMP/max_squared_sum.obj
-WARM_COEF=$TMP/warm_start_coef.obj
-C_=$TMP/C_.obj
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+cd "$eval_dir" # scripts/run.sh references PWD
+$BENCHMARK_SHELL "$scripts_dir/run.sh" $@
 
-echo $PYTHON >&2
-echo "DIR: $DIR" >&2
-echo "SCRIPTS: $SCRIPTS" >&2
-echo "MODEL: $MODEL" >&2
-echo "X: $X" >&2
-echo "y: $y" >&2
-echo "CLASSES: $CLASSES" >&2
-echo "DUAL: $DUAL" >&2
-echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
-echo "WARM_COEF: $WARM_COEF" >&2
-echo "C_: $C_" >&2
-
-# TODO: Try this out on a larger dataset
-# TODO: Benchmark each phase
-
-# Generating model & samples
-$PYTHON $SCRIPTS/gen_model.py 100
-$PYTHON $SCRIPTS/gen_samples.py
-
-# Validity checking functions
-# These functions just check to make sure that the input is valid. 
-# If not they will raise an error. Otherwise, they do not mutate the data.
-$PYTHON $SCRIPTS/check_solver.py $MODEL
-penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL)
-$PYTHON $SCRIPTS/val_data.py $MODEL $X $y 
-$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y
-echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2
-multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL)
-echo "------" >&2
-# TODO: Benchmark each step of the pipeline
-# Make a modified pipeline where each step writes its output to a file
-
-# Calculations functions
-$PYTHON $SCRIPTS/rownorm.py $X
-n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES)
-$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients
-
-# Covtype dataset has 7 classes
-echo "WARM_COEF: $WARM_COEF" >&2
-echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
-
-echo "multiclass: $multiclass" >&2
-echo "penalty: $penalty" >&2
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7
-
-$PYTHON $SCRIPTS/zip_coef.py $MODEL
-$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj
diff --git a/sklearn/scripts/run.sh b/sklearn/scripts/run.sh
new file mode 100755
index 00000000..a3c57c8c
--- /dev/null
+++ b/sklearn/scripts/run.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+PYTHON="python3"
+OUT=${OUT:-$PWD/result}
+TMP=${TMP:-$PWD/tmp}
+#export tmp to env
+export TMP
+SCRIPTS=${SCRIPTS:-$PWD/scripts}
+
+# Ideally, we'll move on to piping rather than writing to a file
+MODEL=$TMP/model.obj
+X=$TMP/X_train.obj
+y=$TMP/y_train.obj
+CLASSES=$TMP/classes.obj
+DUAL=false # should be converted to bool inside script
+MAX_SQ_SUM=$TMP/max_squared_sum.obj
+WARM_COEF=$TMP/warm_start_coef.obj
+C_=$TMP/C_.obj
+
+echo $PYTHON >&2
+echo "DIR: $DIR" >&2
+echo "SCRIPTS: $SCRIPTS" >&2
+echo "MODEL: $MODEL" >&2
+echo "X: $X" >&2
+echo "y: $y" >&2
+echo "CLASSES: $CLASSES" >&2
+echo "DUAL: $DUAL" >&2
+echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
+echo "WARM_COEF: $WARM_COEF" >&2
+echo "C_: $C_" >&2
+
+# TODO: Try this out on a larger dataset
+# TODO: Benchmark each phase
+
+# Generating model & samples
+$PYTHON $SCRIPTS/gen_model.py 100
+$PYTHON $SCRIPTS/gen_samples.py
+
+# Validity checking functions
+# These functions just check to make sure that the input is valid. 
+# If not they will raise an error. Otherwise, they do not mutate the data.
+$PYTHON $SCRIPTS/check_solver.py $MODEL
+penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL)
+$PYTHON $SCRIPTS/val_data.py $MODEL $X $y 
+$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y
+echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2
+multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL)
+echo "------" >&2
+# TODO: Benchmark each step of the pipeline
+# Make a modified pipeline where each step writes its output to a file
+
+# Calculations functions
+$PYTHON $SCRIPTS/rownorm.py $X
+n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES)
+$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients
+
+# Covtype dataset has 7 classes
+echo "WARM_COEF: $WARM_COEF" >&2
+echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
+
+echo "multiclass: $multiclass" >&2
+echo "penalty: $penalty" >&2
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7
+
+$PYTHON $SCRIPTS/zip_coef.py $MODEL
+$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj
diff --git a/uniq-ips/run.sh b/uniq-ips/run.sh
old mode 100755
new mode 100644
index a04f9584..ac96fa12
--- a/uniq-ips/run.sh
+++ b/uniq-ips/run.sh
@@ -1 +1,10 @@
-cat "logs-popcount-org.txt" | sort | uniq > "out.txt"
+#!/bin/bash
+
+REPO_TOP=$(git rev-parse --show-toplevel)
+eval_dir="${REPO_TOP}/uniq-ips"
+scripts_dir="${eval_dir}/scripts"
+
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+cd "$eval_dir" # scripts/run.sh puts files in its current directory
+$BENCHMARK_SHELL "$scripts_dir/run.sh" $@
+
diff --git a/uniq-ips/scripts/run.sh b/uniq-ips/scripts/run.sh
new file mode 100755
index 00000000..a04f9584
--- /dev/null
+++ b/uniq-ips/scripts/run.sh
@@ -0,0 +1 @@
+cat "logs-popcount-org.txt" | sort | uniq > "out.txt"
diff --git a/unix50/run.sh b/unix50/run.sh
index a8f920a9..26b23002 100755
--- a/unix50/run.sh
+++ b/unix50/run.sh
@@ -54,6 +54,7 @@ fi
 echo executing unix50 $(date)
 
 mkdir -p "outputs"
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
 
 for script_input in ${scripts_inputs[@]};
 do
@@ -67,6 +68,6 @@ do
     output_file="./outputs/$script.out"
 
     echo "$script"
-    time $SHELL $script_file $input_file > $output_file
+    $BENCHMARK_SHELL $script_file $input_file > $output_file
     echo $?
 done
diff --git a/web-index/run.sh b/web-index/run.sh
index 197f8000..5b6fd32d 100755
--- a/web-index/run.sh
+++ b/web-index/run.sh
@@ -2,6 +2,7 @@
 
 cd "$(dirname "$0")"
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
 directory_path="inputs/articles"
 
 if [ ! -d "$directory_path" ]; then
@@ -25,5 +26,5 @@ fi
 mkdir -p "$OUTPUT_BASE"
 
 echo "web-index"
-time $SHELL ./scripts/ngrams.sh "$OUTPUT_BASE"
+$BENCHMARK_SHELL ./scripts/ngrams.sh "$OUTPUT_BASE"
 echo $?