Skip to content

Commit

Permalink
packer: add --tuning-info-scuba-log-file argument
Browse files Browse the repository at this point in the history
Summary:
Same as D66544989, but for the `--tuning-info-*` argument to the packer.

We also add an alias so we can standardize on `scuba-dataset` for the dataset argument.

Reviewed By: clara-9

Differential Revision: D66546324

fbshipit-source-id: 41cb1cccf97515ecb388da5ab65669edbb5cbfb1
  • Loading branch information
markbt authored and facebook-github-bot committed Nov 28, 2024
1 parent 309384f commit 418b651
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 21 deletions.
27 changes: 16 additions & 11 deletions eden/mononoke/cmds/packer/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::io;
use std::io::prelude::*;
use std::io::BufReader;
use std::path::Path;
use std::path::PathBuf;
use std::time::Instant;

use anyhow::bail;
Expand Down Expand Up @@ -64,14 +65,13 @@ struct MononokePackerArgs {
#[clap(long, help = "If true, print the progress of the packing")]
print_progress: bool,

/// The scuba table that contains the tuning debug information,
/// for example, the time used for finding the best packing strategy
#[clap(
long,
default_value_t = String::from("file:///tmp/packer_tuning_log.json"),
help = "The scuba table that contains the tuning debug information"
)]
tuning_info_scuba_table: String,
/// Scuba table to log tuning information to
#[clap(long, alias = "tuning-info-scuba-dataset")]
tuning_info_scuba_table: Option<String>,

/// File to log tuning information to
#[clap(long)]
tuning_info_scuba_log_file: Option<PathBuf>,
}

const PACK_PREFIX: &str = "multiblob-";
Expand Down Expand Up @@ -145,7 +145,6 @@ fn main(fb: FacebookInit) -> Result<()> {
let max_parallelism = args.scheduled_max;
let keys_dir = args.keys_dir;
let print_progress = args.print_progress;
let tuning_info_scuba_table = args.tuning_info_scuba_table;

let env = app.environment();
let logger = app.logger();
Expand All @@ -161,8 +160,14 @@ fn main(fb: FacebookInit) -> Result<()> {
.collect::<Result<Vec<_>, io::Error>>()?;
keys_file_entries.shuffle(&mut thread_rng());

// prepare the tuning info scuba table
let tuning_info_scuba_builder = MononokeScubaSampleBuilder::new(fb, &tuning_info_scuba_table)?;
let mut tuning_info_scuba_builder = match args.tuning_info_scuba_table {
Some(table) => MononokeScubaSampleBuilder::new(fb, &table)?,
None => MononokeScubaSampleBuilder::with_discard(),
};

if let Some(file) = args.tuning_info_scuba_log_file {
tuning_info_scuba_builder = tuning_info_scuba_builder.with_log_file(file)?;
}

let total_file_count = keys_file_entries.len();
for (cur, entry) in keys_file_entries.iter().enumerate() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
$ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files4/reporepo.store0.part0.keys.txt

# Pack content into a pack
$ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --print-progress --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" 2>&1 | strip_glog
$ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --print-progress --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" 2>&1 | strip_glog
File *reporepo.store0.part0.keys.txt, which has 3 lines (glob)
Progress: 100.000% processing took * (glob)

Expand Down
12 changes: 6 additions & 6 deletions eden/mononoke/tests/integration/test-packer.t
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
$ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files3/reporepo.store0.part0.keys.txt

# Pack content individually, to show recompression effect
$ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files1/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files2/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files3/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files1/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files2/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files3/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"

# Check logging for individually packed keys (last 3 digits of the compressed size are matched by glob because they can change on zstd crate updates)
$ jq -r '.int * .normal | [ .blobstore_id, .blobstore_key, .uncompressed_size, .compressed_size ] | @csv' < pack-individually.json | sort | uniq
Expand All @@ -78,7 +78,7 @@
$ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files4/reporepo.store0.part0.keys.txt

# Pack content into a pack
$ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"

# Check logging for packed keys (last 3 digits of the compressed size are matched by glob because they can change on zstd crate updates)
$ jq -r '.int * .normal | [ .blobstore_id, .blobstore_key, .pack_key, .uncompressed_size, .compressed_size ] | @csv' < packed.json | sort | uniq
Expand Down Expand Up @@ -125,7 +125,7 @@
$ echo 'repo0000.alias.sha256.85b856bc2313fcddec8464984ab2d384f61625890ee19e4f909dd80ac36e8fd7' >> $TESTTMP/pack_key_files_aliases/reporepo.store0.part0.keys.txt
$ echo 'repo0000.alias.sha256.9b798d4eb3901972c1311a3c6a21480e3f29c8c64cd6bbb81a977ecab56452e3' >> $TESTTMP/pack_key_files_aliases/reporepo.store0.part0.keys.txt

$ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files_aliases/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files_aliases/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"

# Show that they're not packed (hardlink count of 1)
$ stat -c '%s %h %N' $TESTTMP/blobstore/0/blobs/blob-repo0000.alias.* | sort -n
Expand All @@ -152,7 +152,7 @@
$ echo '' >> $TESTTMP/pack_key_files5/reporepo.store0.part0.keys.txt
$ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files5/reporepo.store0.part0.keys.txt

$ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files5/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files5/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"
$ stat -c '%s %h %N' $TESTTMP/blobstore/0/blobs/blob-repo0000.content.blake2.* | sort -n
* 1 '$TESTTMP/blobstore/0/blobs/blob-repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a.pack' (glob)
* 2 '$TESTTMP/blobstore/0/blobs/blob-repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd.pack' (glob)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ Set up the key file for packing
$ (cd blobstore/0/blobs; ls) | sed -e 's/^blob-//' -e 's/.pack$//' >> $TESTTMP/pack_key_files_1/reporepo.store1.part1.keys.txt

Pack the blobs in the two packed stores differently
$ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files_0/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level=19 --keys-dir $TESTTMP/pack_key_files_1/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files_0/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level=19 --keys-dir $TESTTMP/pack_key_files_1/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"

Run a scrub, need a scrub action to put ScrubBlobstore in the stack, which is necessary to make sure all the inner stores of the multiplex are read
$ mononoke_walker -l loaded --blobstore-scrub-action=ReportOnly scrub -q -I deep -i bonsai -i FileContent -b master_bookmark -a all --pack-log-scuba-file pack-info-packed.json 2>&1 | strip_glog
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Check logged pack info. Commit time is forced to zero in tests, hence mtime is 0
$ (cd blobstore/blobs; ls) | sed -e 's/^blob-//' -e 's/.pack$//' >> $TESTTMP/pack_key_files/reporepo.store.part0.keys.txt

Now pack the blobs
$ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json"
$ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json"

Run a scrub again now the storage is packed
$ mononoke_walker -l loaded scrub -q -I deep -i bonsai -i FileContent -p Changeset --checkpoint-name=bonsai_packed --checkpoint-path=test_sqlite -a all --pack-log-scuba-file pack-info-packed.json 2>&1 | strip_glog
Expand Down

0 comments on commit 418b651

Please sign in to comment.