Skip to content

Commit

Permalink
Binary size benchmarking: Rust script to measure size of the ICU4X ex…
Browse files Browse the repository at this point in the history
…amples

compiled into wasm binaries. Set up GHA to build wasm binaries, measure
file sizes, push results into benchmark dashboard .

Resolves ticket unicode-org#140.

Factor in review feedback.
  • Loading branch information
gnrunge committed Jul 23, 2021
1 parent e5f7a5e commit 397c112
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 1 deletion.
106 changes: 105 additions & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -563,14 +563,108 @@ jobs:
path: ./benchmarks/memory/**
name: benchmark-memory

# Binary size benchmark: build and size wasm binaries; creates ndjson output data format

binsize:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Get cargo-make version
id: cargo-make-version
run: |
echo "::set-output name=hash::$(cargo search cargo-make | grep '^cargo-make =' | md5sum)"
shell: bash

- name: Attempt to load cached cargo-make
uses: actions/cache@v2
id: cargo-make-cache
with:
path: |
~/.cargo/bin/cargo-make
~/.cargo/bin/cargo-make.exe
key: ${{ runner.os }}-${{ steps.cargo-make-version.outputs.hash }}

- name: Install cargo-make
if: steps.cargo-make-cache.outputs.cache-hit != 'true'
uses: actions-rs/[email protected]
with:
crate: cargo-make
version: latest

- name: Install prerequisites for wasm build
run: |
rustup component add rust-src
rustup toolchain list
rustup toolchain install nightly-2021-02-28
sudo npm install -g wasm-opt --unsafe-perm
sudo npm install -g wabt
cargo install twiggy
- name: Setup output data directory
run: mkdir -p benchmarks/binsize

- name: Build wasm executables and measure size
run: |
cargo make wasm-examples
cargo run --package icu_benchmark_binsize -- wasmpkg/wasm-opt| tee benchmarks/binsize/output.txt
- name: Store benchmark result (non-merge action only)
# Data from anything that is not a merge to mainline goes to a preliminary branch
if: github.event_name != 'push' || github.ref != 'refs/heads/main'
# Use gregtatum special feature to process ndjson-formatted benchmark data
uses: gregtatum/github-action-benchmark@d3f06f738e9612988d575db23fae5ca0008d3d12
with:
tool: 'ndjson'
output-file-path: benchmarks/binsize/output.txt
benchmark-data-dir-path: ./benchmarks/binsize
# Tentative setting, optimized value to be determined
alert-threshold: '200%'
fail-on-alert: true
gh-pages-branch: unmerged-pr-bench-data
auto-push: false
github-token: ${{ secrets.GITHUB_TOKEN }}
comment-on-alert: true

- name: Store benchmark result (merge to main only)
# Only for PRs that merge into the ICU4X mainline.
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'unicode-org/icu4x'
# Use gregtatum special feature to process ndjson-formatted benchmark data
uses: gregtatum/github-action-benchmark@d3f06f738e9612988d575db23fae5ca0008d3d12
with:
tool: 'ndjson'
output-file-path: benchmarks/binsize/output.txt
benchmark-data-dir-path: ./benchmarks/binsize
# Tentative setting, optimized value to be determined
alert-threshold: '200%'
fail-on-alert: true
gh-pages-branch: gh-pages
auto-push: true
github-token: ${{ secrets.GITHUB_TOKEN }}
comment-on-alert: true
alert-comment-cc-users: '@gnrunge,@sffc,@zbraniecki,@echeran,@gregtatum'

- name: Switch to branch gh-pages for benchmark data storage (merge to main only)
# Only for PRs that merge into the ICU4X mainline
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'unicode-org/icu4x'
run: git checkout gh-pages

- name: Upload binsize with benchmark data in ndjson format (merge to main only)
# Only for PRs that merge into the ICU4X mainline.
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'unicode-org/icu4x'
uses: actions/upload-artifact@v2
with:
path: ./benchmarks/binsize/**
name: benchmark-binsize

# Doc-GH-Pages job

doc_gh_pages:
name: Copy GH pages to docs repo (merge to main only)

runs-on: ubuntu-latest

needs: [check, tidy, benchmark, memory]
needs: [check, tidy, benchmark, memory, binsize]

## Only create docs for merges/pushes to main (skip PRs).
## Multiple unfinished PRs should not clobber docs from approved code.
Expand All @@ -593,6 +687,9 @@ jobs:
- name: Create (ensure existence of) folder for memory benchmark data to copy
run: mkdir -p copy-to-ext-repo/benchmarks/memory

- name: Create (ensure existence of) folder for binary size benchmark data to copy
run: mkdir -p copy-to-ext-repo/benchmarks/binsize

# Doc-GH-Pages job > Download benchmark dashboard files from previous jobs into folder of files to copy to remote repo

- name: Download previous content destined for GH pages
Expand All @@ -608,6 +705,13 @@ jobs:
path: ./copy-to-ext-repo/benchmarks/memory
name: benchmark-memory

# Doc-GH-Pages job > Download benchmark dashboard files from previous jobs into folder of files to copy to remote repo
- name: Download previous content destined for GH pages
uses: actions/download-artifact@v2
with:
path: ./copy-to-ext-repo/benchmarks/binsize
name: benchmark-binsize

# Doc-GH-Pages job > Generate `cargo doc` step

- name: Cargo doc
Expand Down
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ members = [
"provider/testdata",
"tools/benchmark/macros",
"tools/benchmark/memory",
"tools/benchmark/binsize",
"tools/datagen",
"utils/fixed_decimal",
"utils/litemap",
Expand Down
26 changes: 26 additions & 0 deletions tools/benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,29 @@ dhat: The data in dhat-heap.json is viewable with dhat/dh_view.html
This feature replaces the default system allocator with `dlmalloc`, the same Rust-based allocator used in the `wasm32-unknown-unknown` build target. This results in more reliable benchmarks by removing the platform-dependent allocator.

This feature is used by the `cargo make valgrind` build task.

## icu_benchmark_binsize

This is a continuous integration tool to monitor the size of ICU4X binaries.
It is invoked by GitHub Action for each PR, measures the size of the ICU4X demo
files compiled to wasm (WebAssembly) format, and stores the result in a
branch of the repository for subsequent display.

### Usage and Output

```sh
# Prerequisite: build wasm binaries.
cargo make wasm-examples

# Execute binsize benchmark
cargo run --package icu_benchmark_binsize -- wasmpkg/wasm-opt
```

Benchmark output is written in ndjson format, e.g.
`{"biggerIsBetter":false,"name":"simple","unit":"bytes","value":909161}`
for binary `simple.wasm`.

### Packages used

[benchmarking action](https://github.com/gregtatum/github-action-benchmark) – This is a fork that allows collecting
[ndjson](http://ndjson.org/) benchmark data.
12 changes: 12 additions & 0 deletions tools/benchmark/binsize/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "icu_benchmark_binsize"
version = "0.1.0"
authors = ["The ICU4X Project Developers"]
edition = "2018"

[dependencies]
cargo_metadata = "0.13"
10 changes: 10 additions & 0 deletions tools/benchmark/binsize/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# icu_benchmark_binsize [![crates.io](http://meritbadge.herokuapp.com/icu_benchmark_binsize)](https://crates.io/crates/icu_benchmark_binsize)



## More Information

For a description of the binsize benchmark please see
[tools/benchmark/README.md](https://github.com/unicode-org/icu4x/blob/main/tools/benchmark/README.md#icu_benchmark_binsize).

For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
47 changes: 47 additions & 0 deletions tools/benchmark/binsize/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

// This module takes as argument a directory path, looks for wasm
// executables in the directory (i.e., extension .wasm), takes the size in
// bytes of each, and writes the result in ndjson format to stdout.
// Note that the ICU4X wasm executables have to be build prior to
// execution of this module. Use 'cargo make wasm-example'.
use std::env;
use std::fs;
use std::process;

fn wasm_filesize(dir: &str) -> Result<u64, std::io::Error> {
let paths = fs::read_dir(dir).expect("Directory wih wasm binaries not found!");
let mut count :u64 = 0;
for path in paths {
let p = path.unwrap().path();
if let Some(suffix) = p.extension() {
if suffix == "wasm" {
count = count + 1;
println!(
// Write the file name and size in bytes to stdout in ndjson format.
"{{\"biggerIsBetter\":false,\"name\":{:?},\"unit\":\"bytes\",\"value\":{}}}",
p.file_stem().unwrap(),
p.metadata()?.len()
);
}
}
}
Ok(count)
}

fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: cargo run --package icu_benchmark_binsize -- <WASM BINARY DIRECTORY>");
process::exit(1);
}

let wasmdir = &args[1];
let count = wasm_filesize(wasmdir);
if count.unwrap() == 0 {
eprintln!("No wasm binaries found in directory {}", wasmdir);
process::exit(1);
}
}

0 comments on commit 397c112

Please sign in to comment.