From 263911e7fd5143e23ff2f8e3808d84d87cba037d Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Tue, 19 Jul 2022 09:55:52 -0500 Subject: [PATCH] save off what we find when calculating hash (#26663) --- core/src/accounts_hash_verifier.rs | 3 ++ runtime/src/accounts_background_service.rs | 1 + runtime/src/accounts_db.rs | 33 +++++++++++++++++++--- runtime/src/accounts_hash.rs | 2 ++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/core/src/accounts_hash_verifier.rs b/core/src/accounts_hash_verifier.rs index 8b557202fa..9ca5b88eb8 100644 --- a/core/src/accounts_hash_verifier.rs +++ b/core/src/accounts_hash_verifier.rs @@ -141,6 +141,7 @@ impl AccountsHashVerifier { use_write_cache: false, epoch_schedule: &accounts_package.epoch_schedule, rent_collector: &accounts_package.rent_collector, + store_detailed_debug_info_on_failure: false, }, &sorted_storages, timings, @@ -161,6 +162,8 @@ impl AccountsHashVerifier { use_write_cache: false, epoch_schedule: &accounts_package.epoch_schedule, rent_collector: &accounts_package.rent_collector, + // now that we've failed, store off the failing contents that produced a bad capitalization + store_detailed_debug_info_on_failure: true, }, &sorted_storages, HashStats::default(), diff --git a/runtime/src/accounts_background_service.rs b/runtime/src/accounts_background_service.rs index 06f18f9f7f..5f5a429708 100644 --- a/runtime/src/accounts_background_service.rs +++ b/runtime/src/accounts_background_service.rs @@ -240,6 +240,7 @@ impl SnapshotRequestHandler { use_write_cache: false, epoch_schedule: snapshot_root_bank.epoch_schedule(), rent_collector: snapshot_root_bank.rent_collector(), + store_detailed_debug_info_on_failure: false, }, ).unwrap(); assert_eq!(previous_hash, this_hash); diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 99e5baef23..abde334048 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -6462,7 +6462,7 @@ impl AccountsDb { let eligible_for_caching = !config.use_write_cache && end.saturating_sub(start) == MAX_ITEMS_PER_CHUNK; - if eligible_for_caching { + if eligible_for_caching || config.store_detailed_debug_info_on_failure { let range = bin_range.end - bin_range.start; scanner.init_accum(range); } @@ -6478,7 +6478,9 @@ impl AccountsDb { let mut file_name = String::default(); // if we're using the write cache, we can't cache the hash calc results because not all accounts are in append vecs. - if should_cache_hash_data && eligible_for_caching { + if (should_cache_hash_data && eligible_for_caching) + || config.store_detailed_debug_info_on_failure + { let mut load_from_cache = true; let mut hasher = std::collections::hash_map::DefaultHasher::new(); // wrong one? @@ -6515,7 +6517,7 @@ impl AccountsDb { amod.hash(&mut hasher); } } - if load_from_cache { + if load_from_cache && eligible_for_caching { // we have a hash value for all the storages in this slot // so, build a file name: let hash = hasher.finish(); @@ -6696,6 +6698,7 @@ impl AccountsDb { use_write_cache: can_cached_slot_be_unflushed, epoch_schedule, rent_collector, + store_detailed_debug_info_on_failure: false, }, expected_capitalization, ) @@ -6809,6 +6812,20 @@ impl AccountsDb { ); } + /// normal code path returns the common cache path + /// when called after a failure has been detected, redirect the cache storage to a separate folder for debugging later + fn get_cache_hash_data(&self, config: &CalcAccountsHashConfig<'_>) -> CacheHashData { + if !config.store_detailed_debug_info_on_failure { + CacheHashData::new(&self.accounts_hash_cache_path) + } else { + // this path executes when we are failing with a hash mismatch + let mut new = self.accounts_hash_cache_path.clone(); + new.push("failed_calculate_accounts_hash_cache"); + let _ = std::fs::remove_dir_all(&new); + CacheHashData::new(&new) + } + } + // modeled after get_accounts_delta_hash // intended to be faster than calculate_accounts_hash pub fn calculate_accounts_hash_without_index( @@ -6827,7 +6844,7 @@ impl AccountsDb { let mut previous_pass = PreviousPass::default(); let mut final_result = (Hash::default(), 0); - let cache_hash_data = CacheHashData::new(&self.accounts_hash_cache_path); + let cache_hash_data = self.get_cache_hash_data(config); for pass in 0..num_hash_scan_passes { let bounds = Range { @@ -6962,6 +6979,7 @@ impl AccountsDb { use_write_cache: can_cached_slot_be_unflushed, epoch_schedule, rent_collector, + store_detailed_debug_info_on_failure: false, }, None, )?; @@ -9001,6 +9019,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, None, ) @@ -9390,6 +9409,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, &get_storage_refs(&storages), HashStats::default(), @@ -9419,6 +9439,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, &get_storage_refs(&storages), HashStats::default(), @@ -9529,6 +9550,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, &get_storage_refs(&storages), test_scan, @@ -11570,6 +11592,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, ) .is_err()); @@ -11602,6 +11625,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, ) .unwrap(), @@ -11615,6 +11639,7 @@ pub mod tests { use_write_cache: false, epoch_schedule: &EpochSchedule::default(), rent_collector: &RentCollector::default(), + store_detailed_debug_info_on_failure: false, }, ) .unwrap(), diff --git a/runtime/src/accounts_hash.rs b/runtime/src/accounts_hash.rs index 65b4fb3117..a0ce69a553 100644 --- a/runtime/src/accounts_hash.rs +++ b/runtime/src/accounts_hash.rs @@ -43,6 +43,8 @@ pub struct CalcAccountsHashConfig<'a> { pub use_write_cache: bool, pub epoch_schedule: &'a EpochSchedule, pub rent_collector: &'a RentCollector, + /// used for tracking down hash mismatches after the fact + pub store_detailed_debug_info_on_failure: bool, } impl<'a> CalcAccountsHashConfig<'a> {