From 3577708b84f6f121c2dab8d4d5fcdef445989a7e Mon Sep 17 00:00:00 2001 From: gnrunge Date: Tue, 20 Jul 2021 15:40:43 -0700 Subject: [PATCH 1/2] Binary size benchmarking: Rust script to measure size of the ICU4X examples compiled into wasm binaries. Set up GHA to build wasm binaries, measure file sizes, push results into benchmark dashboard . Resolves ticket #140. Factor in review feedback. Fix tidy clippy findings. --- .github/workflows/build-test.yml | 106 +++++++++++++++++++++++++++- Cargo.lock | 7 ++ Cargo.toml | 1 + tools/benchmark/README.md | 26 +++++++ tools/benchmark/binsize/Cargo.toml | 12 ++++ tools/benchmark/binsize/README.md | 7 ++ tools/benchmark/binsize/src/main.rs | 47 ++++++++++++ 7 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 tools/benchmark/binsize/Cargo.toml create mode 100644 tools/benchmark/binsize/README.md create mode 100644 tools/benchmark/binsize/src/main.rs diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 2c08e77db3f..57f04d7a128 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -563,6 +563,100 @@ jobs: path: ./benchmarks/memory/** name: benchmark-memory + # Binary size benchmark: build and size wasm binaries; creates ndjson output data format + + binsize: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Get cargo-make version + id: cargo-make-version + run: | + echo "::set-output name=hash::$(cargo search cargo-make | grep '^cargo-make =' | md5sum)" + shell: bash + + - name: Attempt to load cached cargo-make + uses: actions/cache@v2 + id: cargo-make-cache + with: + path: | + ~/.cargo/bin/cargo-make + ~/.cargo/bin/cargo-make.exe + key: ${{ runner.os }}-${{ steps.cargo-make-version.outputs.hash }} + + - name: Install cargo-make + if: steps.cargo-make-cache.outputs.cache-hit != 'true' + uses: actions-rs/install@v0.1.2 + with: + crate: cargo-make + version: latest + + - name: Install prerequisites for wasm build + run: | + rustup component add rust-src + rustup toolchain list + rustup toolchain install nightly-2021-02-28 + sudo npm install -g wasm-opt --unsafe-perm + sudo npm install -g wabt + cargo install twiggy + + - name: Setup output data directory + run: mkdir -p benchmarks/binsize + + - name: Build wasm executables and measure size + run: | + cargo make wasm-examples + cargo run --package icu_benchmark_binsize -- wasmpkg/wasm-opt| tee benchmarks/binsize/output.txt + + - name: Store benchmark result (non-merge action only) + # Data from anything that is not a merge to mainline goes to a preliminary branch + if: github.event_name != 'push' || github.ref != 'refs/heads/main' + # Use gregtatum special feature to process ndjson-formatted benchmark data + uses: gregtatum/github-action-benchmark@d3f06f738e9612988d575db23fae5ca0008d3d12 + with: + tool: 'ndjson' + output-file-path: benchmarks/binsize/output.txt + benchmark-data-dir-path: ./benchmarks/binsize + # Tentative setting, optimized value to be determined + alert-threshold: '200%' + fail-on-alert: true + gh-pages-branch: unmerged-pr-bench-data + auto-push: false + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-on-alert: true + + - name: Store benchmark result (merge to main only) + # Only for PRs that merge into the ICU4X mainline. + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'unicode-org/icu4x' + # Use gregtatum special feature to process ndjson-formatted benchmark data + uses: gregtatum/github-action-benchmark@d3f06f738e9612988d575db23fae5ca0008d3d12 + with: + tool: 'ndjson' + output-file-path: benchmarks/binsize/output.txt + benchmark-data-dir-path: ./benchmarks/binsize + # Tentative setting, optimized value to be determined + alert-threshold: '200%' + fail-on-alert: true + gh-pages-branch: gh-pages + auto-push: true + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-on-alert: true + alert-comment-cc-users: '@gnrunge,@sffc,@zbraniecki,@echeran,@gregtatum' + + - name: Switch to branch gh-pages for benchmark data storage (merge to main only) + # Only for PRs that merge into the ICU4X mainline + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'unicode-org/icu4x' + run: git checkout gh-pages + + - name: Upload binsize with benchmark data in ndjson format (merge to main only) + # Only for PRs that merge into the ICU4X mainline. + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'unicode-org/icu4x' + uses: actions/upload-artifact@v2 + with: + path: ./benchmarks/binsize/** + name: benchmark-binsize + # Doc-GH-Pages job doc_gh_pages: @@ -570,7 +664,7 @@ jobs: runs-on: ubuntu-latest - needs: [check, tidy, benchmark, memory] + needs: [check, tidy, benchmark, memory, binsize] ## Only create docs for merges/pushes to main (skip PRs). ## Multiple unfinished PRs should not clobber docs from approved code. @@ -593,6 +687,9 @@ jobs: - name: Create (ensure existence of) folder for memory benchmark data to copy run: mkdir -p copy-to-ext-repo/benchmarks/memory + - name: Create (ensure existence of) folder for binary size benchmark data to copy + run: mkdir -p copy-to-ext-repo/benchmarks/binsize + # Doc-GH-Pages job > Download benchmark dashboard files from previous jobs into folder of files to copy to remote repo - name: Download previous content destined for GH pages @@ -608,6 +705,13 @@ jobs: path: ./copy-to-ext-repo/benchmarks/memory name: benchmark-memory + # Doc-GH-Pages job > Download benchmark dashboard files from previous jobs into folder of files to copy to remote repo + - name: Download previous content destined for GH pages + uses: actions/download-artifact@v2 + with: + path: ./copy-to-ext-repo/benchmarks/binsize + name: benchmark-binsize + # Doc-GH-Pages job > Generate `cargo doc` step - name: Cargo doc diff --git a/Cargo.lock b/Cargo.lock index da3ebabc86d..a51cb950f93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -838,6 +838,13 @@ dependencies = [ "serde_json", ] +[[package]] +name = "icu_benchmark_binsize" +version = "0.1.0" +dependencies = [ + "cargo_metadata", +] + [[package]] name = "icu_benchmark_macros" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 2ee9c52806a..fd517afe731 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "provider/testdata", "tools/benchmark/macros", "tools/benchmark/memory", + "tools/benchmark/binsize", "tools/datagen", "utils/fixed_decimal", "utils/litemap", diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md index 70aedd06928..7dbb6b0feea 100644 --- a/tools/benchmark/README.md +++ b/tools/benchmark/README.md @@ -100,3 +100,29 @@ dhat: The data in dhat-heap.json is viewable with dhat/dh_view.html This feature replaces the default system allocator with `dlmalloc`, the same Rust-based allocator used in the `wasm32-unknown-unknown` build target. This results in more reliable benchmarks by removing the platform-dependent allocator. This feature is used by the `cargo make valgrind` build task. + +## icu_benchmark_binsize + +This is a continuous integration tool to monitor the size of ICU4X binaries. +It is invoked by GitHub Action for each PR, measures the size of the ICU4X demo +files compiled to wasm (WebAssembly) format, and stores the result in a +branch of the repository for subsequent display. + +### Usage and Output + +```sh +# Prerequisite: build wasm binaries. +cargo make wasm-examples + +# Execute binsize benchmark +cargo run --package icu_benchmark_binsize -- wasmpkg/wasm-opt +``` + +Benchmark output is written in ndjson format, e.g. +`{"biggerIsBetter":false,"name":"simple","unit":"bytes","value":909161}` +for binary `simple.wasm`. + +### Packages used + +[benchmarking action](https://github.com/gregtatum/github-action-benchmark) – This is a fork that allows collecting +[ndjson](http://ndjson.org/) benchmark data. diff --git a/tools/benchmark/binsize/Cargo.toml b/tools/benchmark/binsize/Cargo.toml new file mode 100644 index 00000000000..c42f32ff755 --- /dev/null +++ b/tools/benchmark/binsize/Cargo.toml @@ -0,0 +1,12 @@ +# This file is part of ICU4X. For terms of use, please see the file +# called LICENSE at the top level of the ICU4X source tree +# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +[package] +name = "icu_benchmark_binsize" +version = "0.1.0" +authors = ["The ICU4X Project Developers"] +edition = "2018" + +[dependencies] +cargo_metadata = "0.13" diff --git a/tools/benchmark/binsize/README.md b/tools/benchmark/binsize/README.md new file mode 100644 index 00000000000..90ae5948235 --- /dev/null +++ b/tools/benchmark/binsize/README.md @@ -0,0 +1,7 @@ +# icu_benchmark_binsize [![crates.io](http://meritbadge.herokuapp.com/icu_benchmark_binsize)](https://crates.io/crates/icu_benchmark_binsize) + + + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/tools/benchmark/binsize/src/main.rs b/tools/benchmark/binsize/src/main.rs new file mode 100644 index 00000000000..c5f6c01c42b --- /dev/null +++ b/tools/benchmark/binsize/src/main.rs @@ -0,0 +1,47 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// This module takes as argument a directory path, looks for wasm +// executables in the directory (i.e., extension .wasm), takes the size in +// bytes of each, and writes the result in ndjson format to stdout. +// Note that the ICU4X wasm executables have to be build prior to +// execution of this module. Use 'cargo make wasm-example'. +use std::env; +use std::fs; +use std::process; + +fn wasm_filesize(dir: &str) -> Result { + let paths = fs::read_dir(dir).expect("Directory wih wasm binaries not found!"); + let mut count: u64 = 0; + for path in paths { + let p = path.unwrap().path(); + if let Some(suffix) = p.extension() { + if suffix == "wasm" { + count += 1; + println!( + // Write the file name and size in bytes to stdout in ndjson format. + "{{\"biggerIsBetter\":false,\"name\":{:?},\"unit\":\"bytes\",\"value\":{}}}", + p.file_stem().unwrap(), + p.metadata()?.len() + ); + } + } + } + Ok(count) +} + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 2 { + eprintln!("Usage: cargo run --package icu_benchmark_binsize -- "); + process::exit(1); + } + + let wasmdir = &args[1]; + let count = wasm_filesize(wasmdir); + if count.unwrap() == 0 { + eprintln!("No wasm binaries found in directory {}", wasmdir); + process::exit(1); + } +} From ab013513ef61b69600f8e0afa252445382205f56 Mon Sep 17 00:00:00 2001 From: Norbert Runge <41129501+gnrunge@users.noreply.github.com> Date: Tue, 27 Jul 2021 16:00:31 -0700 Subject: [PATCH 2/2] Update tools/benchmark/binsize/src/main.rs Co-authored-by: Greg Tatum --- tools/benchmark/binsize/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/benchmark/binsize/src/main.rs b/tools/benchmark/binsize/src/main.rs index c5f6c01c42b..46655a8a1f6 100644 --- a/tools/benchmark/binsize/src/main.rs +++ b/tools/benchmark/binsize/src/main.rs @@ -12,7 +12,7 @@ use std::fs; use std::process; fn wasm_filesize(dir: &str) -> Result { - let paths = fs::read_dir(dir).expect("Directory wih wasm binaries not found!"); + let paths = fs::read_dir(dir).expect("Directory with wasm binaries not found!"); let mut count: u64 = 0; for path in paths { let p = path.unwrap().path();