Skip to content

Commit

Permalink
Add in comparison script to check for performance differences with pr…
Browse files Browse the repository at this point in the history
…evious version
  • Loading branch information
pflooky committed Jul 4, 2024
1 parent 67e7d66 commit 8dccb6a
Show file tree
Hide file tree
Showing 10 changed files with 263 additions and 215 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ jobs:
- name: Get Spark query engine jars
run: bash benchmark/setup_query_engine_jars.sh
- name: Run benchmark script
run: bash benchmark/run_benchmark.sh
run: |
version=$(grep dataCatererVersion gradle.properties | cut -d= -f2)
bash benchmark/run_benchmark.sh
bash benchmark/compare_benchmark_results.sh "$version"
- name: Create pull request
uses: peter-evans/create-pull-request@v6
with:
Expand Down
48 changes: 48 additions & 0 deletions benchmark/compare_benchmark_results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env bash

SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
LATEST_VERSION=${1:-0.11.5}
RESULT_FILE_REGEX="benchmark_results_([0-9\.]+)\.txt"

if [[ -z $2 ]]; then
echo "No second version to compare against passed into arguments, defaulting to previous version in results"
PREVIOUS_VERSION_RESULT_FILE_NAME=$(ls -1 "$SCRIPT_DIR/results" | sort --version-sort | tail -2 | head -1)
if [[ $PREVIOUS_VERSION_RESULT_FILE_NAME =~ $RESULT_FILE_REGEX ]]; then
PREVIOUS_VERSION="${BASH_REMATCH[1]}"
else
echo "Previous version file name does not match regex: $RESULT_FILE_REGEX, previous version file: $PREVIOUS_VERSION_RESULT_FILE_NAME"
exit 1
fi
else
PREVIOUS_VERSION=${2}
fi
echo "Latest version: $LATEST_VERSION"
echo "Previous version: $PREVIOUS_VERSION"
echo


plans=(
"io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,"
"io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,"
"io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,"
"io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,"
"io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,"
)

for plan in "${plans[@]}"; do
plan_name=$(echo "$plan" | sed 's/io.github.datacatering.plan.benchmark.//')
echo "Comparing performance for plan: $plan_name"

latest_version_results=$(cat "$SCRIPT_DIR/results/benchmark_results_${LATEST_VERSION}.txt" | grep "${plan}")
previous_version_results=$(cat "$SCRIPT_DIR/results/benchmark_results_${PREVIOUS_VERSION}.txt" | grep "${plan}")

latest_version_average_time=$(echo "$latest_version_results" | awk -F "," '{s+=$4} END {print s/3}')
previous_version_average_time=$(echo "$previous_version_results" | awk -F "," '{s+=$4} END {print s/3}')

difference=$(awk -v t1="$previous_version_average_time" -v t2="$latest_version_average_time" 'BEGIN{printf "%.3f", t2-t1}')
percent_difference=$(awk -v t1="$previous_version_average_time" -v t2="$latest_version_average_time" 'BEGIN{printf "%.3f", (t2-t1)/t1 * 100}')

echo "Version: $PREVIOUS_VERSION, Average time (s): $previous_version_average_time"
echo "Version: $LATEST_VERSION, Average time (s): $latest_version_average_time, Difference (s): $difference, Percent: $percent_difference%"
echo
done
60 changes: 30 additions & 30 deletions benchmark/results/benchmark_results_0.10.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,33 @@ System info:
Driver memory: DRIVER_MEMORY=2g
Executor memory: EXECUTOR_MEMORY=2g
Class name, Num records, Num run, Time taken (s)
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 1, 18.10
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 2, 17.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 3, 18.75
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 1, 18.45
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 2, 18.55
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 3, 18.54
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 1, 18.97
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 2, 18.61
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 3, 18.74
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 1, 3.22
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 2, 3.11
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 3, 3.29
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 1, 13.15
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 2, 13.01
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 3, 13.17
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 1, 18.55
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 2, 18.76
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 3, 18.72
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 1, 65.31
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 2, 66.68
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 3, 65.88
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 1, 37.88
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 2, 37.10
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 3, 38.23
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 1, 18.32
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 2, 18.64
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 3, 18.17
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 1, 18.36
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 2, 18.39
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 3, 18.85
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,1,18.10
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,2,17.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,3,18.75
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,1,18.45
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,2,18.55
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,3,18.54
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,1,18.97
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,2,18.61
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,3,18.74
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,1,3.22
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,2,3.11
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,3,3.29
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,1,13.15
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,2,13.01
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,3,13.17
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,1,18.55
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,2,18.76
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,3,18.72
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,1,65.31
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,2,66.68
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,3,65.88
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,1,37.88
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,2,37.10
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,3,38.23
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,1,18.32
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,2,18.64
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,3,18.17
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,1,18.36
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,2,18.39
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,3,18.85
60 changes: 30 additions & 30 deletions benchmark/results/benchmark_results_0.10.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,33 @@ System info:
Driver memory: DRIVER_MEMORY=2g
Executor memory: EXECUTOR_MEMORY=2g
Class name, Num records, Num run, Time taken (s)
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 1, 19.26
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 2, 19.02
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 3, 18.47
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 1, 19.11
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 2, 18.66
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 3, 19.17
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 1, 18.96
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 2, 18.58
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 3, 18.41
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 1, 3.31
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 2, 3.31
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 3, 3.27
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 1, 13.47
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 2, 13.51
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 3, 13.52
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 1, 18.91
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 2, 19.11
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 3, 18.93
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 1, 67.12
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 2, 67.25
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 3, 67.23
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 1, 37.41
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 2, 38.44
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 3, 38.26
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 1, 19.30
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 2, 18.30
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 3, 18.88
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 1, 18.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 2, 18.42
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 3, 18.96
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,1,19.26
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,2,19.02
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,3,18.47
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,1,19.11
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,2,18.66
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,3,19.17
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,1,18.96
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,2,18.58
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,3,18.41
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,1,3.31
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,2,3.31
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,3,3.27
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,1,13.47
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,2,13.51
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,3,13.52
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,1,18.91
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,2,19.11
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,3,18.93
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,1,67.12
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,2,67.25
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,3,67.23
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,1,37.41
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,2,38.44
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,3,38.26
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,1,19.30
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,2,18.30
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,3,18.88
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,1,18.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,2,18.42
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,3,18.96
60 changes: 30 additions & 30 deletions benchmark/results/benchmark_results_0.10.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,33 @@ System info:
Driver memory: DRIVER_MEMORY=2g
Executor memory: EXECUTOR_MEMORY=2g
Class name, Num records, Num run, Time taken (s)
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 1, 18.62
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 2, 18.50
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default, 100000, 3, 18.22
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 1, 19.03
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 2, 18.51
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze, 100000, 3, 18.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 1, 18.43
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 2, 19.14
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet, 100000, 3, 18.70
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 1, 3.37
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 2, 3.20
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten, 100000, 3, 3.30
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 1, 13.30
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 2, 12.91
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 10000, 3, 13.26
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 1, 18.33
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 2, 18.41
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 3, 18.49
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 1, 66.02
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 2, 63.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 1000000, 3, 66.96
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 1, 37.58
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 2, 37.86
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:, 500000, 3, 37.33
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 1, 18.70
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 2, 18.41
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:, 100000, 3, 18.39
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 1, 18.66
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 2, 18.57
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:, 100000, 3, 18.35
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,1,18.62
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,2,18.50
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:default,100000,3,18.22
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,1,19.03
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,2,18.51
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:blaze,100000,3,18.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,1,18.43
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,2,19.14
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:comet,100000,3,18.70
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,1,3.37
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,2,3.20
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:gluten,100000,3,3.30
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,1,13.30
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,2,12.91
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,10000,3,13.26
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,1,18.33
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,2,18.41
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,3,18.49
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,1,66.02
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,2,63.98
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,1000000,3,66.96
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,1,37.58
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,2,37.86
io.github.datacatering.plan.benchmark.BenchmarkForeignKeyPlanRun:,500000,3,37.33
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,1,18.70
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,2,18.41
io.github.datacatering.plan.benchmark.BenchmarkJsonPlanRun:,100000,3,18.39
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,1,18.66
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,2,18.57
io.github.datacatering.plan.benchmark.BenchmarkParquetPlanRun:,100000,3,18.35
Loading

0 comments on commit 8dccb6a

Please sign in to comment.