From 5c6b05f11299f9b372ab88450f7391c09f1b683a Mon Sep 17 00:00:00 2001 From: Herman Venter Date: Mon, 9 Dec 2024 09:50:28 -0800 Subject: [PATCH] Cleanup function summary caches, take 2. (#19) --- binaries/summary_store.tar | Bin 3527168 -> 3527168 bytes checker/src/body_visitor.rs | 2 +- checker/src/call_visitor.rs | 18 ++--- checker/src/callbacks.rs | 4 +- checker/src/constant_domain.rs | 10 +-- checker/src/crate_visitor.rs | 7 +- checker/src/summaries.rs | 121 +++++++++++++++------------------ 7 files changed, 72 insertions(+), 90 deletions(-) diff --git a/binaries/summary_store.tar b/binaries/summary_store.tar index 60c1ec4aaf78f45f25784d40db12ff42f9dff11e..968b52263fa1d53414fe814f4344a847ca6376d1 100644 GIT binary patch delta 1634 zcmZwHeM}Q~7zc2!y~4FnsBNhtZRz`6-_E=Cu2+YfC<;CulS!wQU#i8C>^=ImO=+=OB4rLmHa;kiJZX<4i!Q(`GTPT+$y z>*E=Y4RZb_LwuPe*3Sk4JnH9T{qpr#tmI=@A3Go=aX<*S z!ooSlbBiiU7fV4_Z<(ehD&|G>G7QI{;J9VzIAAwfF5{4=v{tF1C}Me}g5@@Ac0Md5 ztl#4>KWyElCMIe}d`LWO#+zzC-it$aqcxX>)^h6s4FoS+&#EDLiW zdO!xNy|&wG=>66DnGO`wY^PlCn!}cp0LANUN)5b|Wot->-*(tW4TFmzTRjftmu;*D z7HzXFkwW?(wwWWr*I6fs8u)`%;)O!;$QTm0kA5eyBU)Xwg8Ja$Mozb18Wy6-w7RC>Cbq zlbsqE*X}?)XSb5VUCgJM~Sc_cAy`Y%z%eRRIUWNM3qXy2T^S8+dr!_~K`trGdf+DY??Xu=H~DyLHI zs^sz&IopY-dJA41j*Byj>=Je~G?r3h(uoP<&x9q>p^0$kF{!a;=o?m5^!8rqMc1nO ztn2^XXK&w^Ohm~iGQCmEFj7u&W{)-#7BrC|2F4$xG+Mb@Q%Vp~&o!C2BVJYlYmC%f z9=_?Jnlqs*Of4W`W{FeKLeVK|ksqEOcD7`~I-j$Xha>l#Q?($jbQTApCFuH6iPxU) zJ|@1G<0=ss9--HYRRt6-wztvwa5~rZml7f&SEUwIGFQY8)vd1P6lg7Qbu*yH++i(H zTU}Euur0@ZAPF)ry1M+}Uh8hqCLX<>C?%qlMCPEx4JVT*@z8437)aB(*D%m?$(@-5 zZ@%Te!Gkx+)2fBDjqYJAv`zM$Nd)!>_frP;HG00&!flfW^?Y&FGnIf1vpo7tcskg7 zUz_;a@Gf*d#<4OF-S>5`xRKBWAUs{!;Q(>6S3d|0Kli=4zuudY3U@u;y-xV(rdMu& zrg`4K8K}vm-KlV5zxNssEfeSj>ye%Gomb#wIX!$dUKcUn6*nBC^EMx*b>jAiGyy%w zXc;CRY@vUIvCXuOz}?H9bVK1u+Ae$cv@J@P#qa63K$G~=EL%QwKA?v@D}_-B+M>f= z*U61H)Uzbfgr8*4{g}L^3K^M@kqsGrA)`NJn^7@xAf?0^^%7u>b%7 delta 1615 zcma*neM}Q~90u@SdxaK()?PsnT1#KJ>y;jN*B%9#x+qgIPFy0%BEDtnoJ@Qnq8pK^ zaVdKV48rEAGjYqD1eTzV=`-BiByJ+=hDIIRA|lb~GH?RwCTPZfY(vX2nENK#J#FZ5HlQ7ko-o*-^Z0DNe(@3tD&f&BstSjv~hLO z$|>aefbv&$W~L%%1ViL_*QuJ{ld49}A%)2MdR(!CvXxUdiqH}|f+qAt1QAIX2qO_i zj3c6n7$TN15oTgMVIdg8O2iTI!~`ONNF*i_NklR++-WB!5e^~+l+F2itZJ%pCgKgB zGuL7)Syi8`r)Wxpi&LCW2J<$798B} zI!xovMAwWw)WX%kAbOqa0}GO`x)dFjgfS{|deI%j7`nHLOAL3*Xf-xW&BNp_w?juCjC}Hh zS~b%hhr7GoTsn@qJacqt{KhRYcy_X9vknVyxR*`C(oLQNI?$|V0fW*CPrepyQ#@6f z*#CoPJ`H`Tr-Z?qeV*5JXnV_ZITtC;)Qth$>+q-y=B-X`SHRr&#QM?wNh&Ado&3}$ z20sl9XFU5-PsO1ohaJw`OufvYpptz%6N~(8XoAsMeQ1JVfPF~AX6JfD6qd4mjN$8W zLY;QN7LD73EaR#cga7;TTHCf< zN4qve&C;uTPqVWl7n~`}8NTTzHwN39p=(k7b$VpCvU{|K>RQdAsQ!9AI?l51YK`Xi z@`?=uk)3*M|CT*(j+N&uZZZs5_ULz%YczJXc^Nky?{~5}N%-gzSH!}Y$1zsqnZ3mU z+}*`}nvK$}-pdLS9&sTn>RY`lGtoT5+mwyGBi;flssem{0A?+JGKOBaINheUoA?*h zhNHq2B#q}E#Gro@e_I4Ii7&Ha<>!2X6IrYHCKJ|n^IZXS*7D_6^aOklMVz?GH$=m^ z!PluEZ<24X6<>VjOP6qKg)ciAjSqZ}L{PQ9<5rYUO(U7xJ-$=p(7YurIU5JPY2RDX zb}nrii|03{6&S$l{UpUdWnG7s}Uj)k+Ats@NyQtNZ!{1MWlUqL6b$h&RwD)~?-+ zCC3GiekX+2)k7^TtvWsy=Aq?Rp*W)Ue5=6H=WpF;!KGdy;a{0@k|DYD+<0TzcNcoK z12p-cei>wM#)R!+*e-?ba@ZaS+k;2#O6C61Z50ZPE<{i#m9c9#m_hdbe=c_^D~l|6 V8Mf2PnC%`?Z~#p@dtwbFEs!F diff --git a/checker/src/body_visitor.rs b/checker/src/body_visitor.rs index a7023fa1..24d2df74 100644 --- a/checker/src/body_visitor.rs +++ b/checker/src/body_visitor.rs @@ -106,7 +106,7 @@ impl<'analysis, 'compilation, 'tcx> BodyVisitor<'analysis, 'compilation, 'tcx> { let tcx = crate_visitor.tcx; let function_name = crate_visitor .summary_cache - .get_summary_key_for(def_id) + .get_summary_key_for(def_id, tcx) .clone(); let mir = if tcx.is_const_fn(def_id) { tcx.mir_for_ctfe(def_id) diff --git a/checker/src/call_visitor.rs b/checker/src/call_visitor.rs index 46bacd44..e459e814 100644 --- a/checker/src/call_visitor.rs +++ b/checker/src/call_visitor.rs @@ -106,7 +106,7 @@ impl<'call, 'block, 'analysis, 'compilation, 'tcx> pub fn create_and_cache_function_summary( &mut self, func_args: &Option>>>, - initial_type_cache: &Option, Ty<'tcx>>>>, + type_args: &Option, Ty<'tcx>>>>, ) -> Summary { let func_type = self .block_visitor @@ -149,7 +149,7 @@ impl<'call, 'block, 'analysis, 'compilation, 'tcx> .bv .cv .summary_cache - .get_summary_for_call_site(func_ref, func_args, initial_type_cache); + .get_summary_for_call_site(func_ref, func_args, type_args); if previous_summary.is_computed { summary.join_side_effects(previous_summary) } @@ -162,12 +162,7 @@ impl<'call, 'block, 'analysis, 'compilation, 'tcx> .bv .cv .summary_cache - .set_summary_for_call_site( - func_ref, - func_args, - initial_type_cache, - summary.clone(), - ); + .set_summary_for_call_site(func_ref, func_args, type_args, summary.clone()); } return summary; } @@ -361,7 +356,7 @@ impl<'call, 'block, 'analysis, 'compilation, 'tcx> .is_none()) && func_args.is_none()), ); - let initial_type_cache = self.initial_type_cache.clone(); + let type_args = self.initial_type_cache.clone(); let call_depth = *self .block_visitor .bv @@ -373,14 +368,13 @@ impl<'call, 'block, 'analysis, 'compilation, 'tcx> .bv .cv .summary_cache - .get_summary_for_call_site(func_ref, &func_args, &initial_type_cache) + .get_summary_for_call_site(func_ref, &func_args, &type_args) .clone(); if result.is_computed || func_ref.def_id.is_none() { return Some(result); } if call_depth < 4 { - let mut summary = - self.create_and_cache_function_summary(&func_args, &initial_type_cache); + let mut summary = self.create_and_cache_function_summary(&func_args, &type_args); if call_depth >= 1 { summary.post_condition = None; diff --git a/checker/src/callbacks.rs b/checker/src/callbacks.rs index 9d0e9c26..51eec854 100644 --- a/checker/src/callbacks.rs +++ b/checker/src/callbacks.rs @@ -9,7 +9,7 @@ use crate::constant_domain::ConstantValueCache; use crate::crate_visitor::CrateVisitor; use crate::known_names::KnownNamesCache; use crate::options::Options; -use crate::summaries::PersistentSummaryCache; +use crate::summaries::SummaryCache; use crate::type_visitor::TypeCache; use crate::utils; @@ -162,7 +162,7 @@ impl MiraiCallbacks { options: &std::mem::take(&mut self.options), session: &compiler.sess, generic_args_cache: HashMap::new(), - summary_cache: PersistentSummaryCache::new(tcx, summary_store_path), + summary_cache: SummaryCache::new(summary_store_path), tcx, test_run: self.test_run, type_cache: Rc::new(RefCell::new(TypeCache::new())), diff --git a/checker/src/constant_domain.rs b/checker/src/constant_domain.rs index 3f80c8d3..c9c3d123 100644 --- a/checker/src/constant_domain.rs +++ b/checker/src/constant_domain.rs @@ -18,7 +18,7 @@ use std::{f16, f64}; use crate::expression::{Expression, ExpressionType}; use crate::known_names::{KnownNames, KnownNamesCache}; -use crate::summaries::PersistentSummaryCache; +use crate::summaries::SummaryCache; use crate::utils; /// Abstracts over constant values referenced in MIR and adds information @@ -135,9 +135,9 @@ impl ConstantDomain { generic_args: Option>, tcx: TyCtxt<'tcx>, known_names_cache: &mut KnownNamesCache, - summary_cache: &mut PersistentSummaryCache<'tcx>, + summary_cache: &mut SummaryCache<'tcx>, ) -> ConstantDomain { - let summary_cache_key = summary_cache.get_summary_key_for(def_id).to_owned(); + let summary_cache_key = summary_cache.get_summary_key_for(def_id, tcx).to_owned(); let argument_type_key = utils::argument_types_key_str(tcx, generic_args); let generic_arguments = if let Some(generic_args) = generic_args { generic_args @@ -1606,11 +1606,11 @@ impl<'tcx> ConstantValueCache<'tcx> { tcx: TyCtxt<'tcx>, // A cache of known names to updated with this function if its name is known. known_names_cache: &mut KnownNamesCache, - // A cache of function summaries. It is not provided with a summary for def_id + // A collection of caches for function summaries. It is not provided with a summary for def_id // at this point, but it does provide a place to cache the key string that is used // to cache the summary when it is created. We do this so that we can use DefIds // for lookups while also making the summary cache persistable. - summary_cache: &mut PersistentSummaryCache<'tcx>, + summary_cache: &mut SummaryCache<'tcx>, ) -> &ConstantDomain { let function_id = self.function_cache.len(); // Distinct instances of def_id will have distinct ty values, but diff --git a/checker/src/crate_visitor.rs b/checker/src/crate_visitor.rs index 84d92370..2be69341 100644 --- a/checker/src/crate_visitor.rs +++ b/checker/src/crate_visitor.rs @@ -30,7 +30,7 @@ use crate::constant_domain::ConstantValueCache; use crate::expected_errors; use crate::known_names::KnownNamesCache; use crate::options::Options; -use crate::summaries::PersistentSummaryCache; +use crate::summaries::SummaryCache; use crate::tag_domain::Tag; use crate::type_visitor::TypeCache; use crate::utils; @@ -52,7 +52,7 @@ pub struct CrateVisitor<'compilation, 'tcx> { pub known_names_cache: KnownNamesCache, pub options: &'compilation Options, pub session: &'compilation Session, - pub summary_cache: PersistentSummaryCache<'tcx>, + pub summary_cache: SummaryCache<'tcx>, pub tcx: TyCtxt<'tcx>, pub type_cache: Rc>>, pub test_run: bool, @@ -186,7 +186,8 @@ impl<'compilation> CrateVisitor<'compilation, '_> { if matches!(kind, rustc_hir::def::DefKind::Static { .. }) || utils::is_foreign_contract(self.tcx, def_id) { - self.summary_cache.set_summary_for(def_id, summary); + self.summary_cache + .set_summary_for(def_id, self.tcx, summary); } let old_diags = self.diagnostics_for.insert(def_id, diagnostics); checked_assume!(old_diags.is_none()); diff --git a/checker/src/summaries.rs b/checker/src/summaries.rs index 09ee48d9..32b5d27f 100644 --- a/checker/src/summaries.rs +++ b/checker/src/summaries.rs @@ -362,16 +362,20 @@ pub struct CallSiteKey<'tcx> { func_args: Option>>>, /// If this is None, func_args must not be None. type_args: Option, Ty<'tcx>>>>, + /// Uniquely identifies the function reference used at the call site. + function_id: usize, } impl<'tcx> CallSiteKey<'tcx> { pub fn new( func_args: Option>>>, type_args: Option, Ty<'tcx>>>>, + function_id: usize, ) -> CallSiteKey<'tcx> { CallSiteKey { func_args, type_args, + function_id, } } } @@ -387,54 +391,53 @@ impl Hash for CallSiteKey<'_> { ty.kind().hash(state); } } + self.function_id.hash(state); } } -/// A persistent map from summary key to Summary, along with a transient cache from DefId to -/// Summary. The latter is cleared after every outer fixed point loop iteration. -/// Also tracks which definitions depend on (use) any particular Summary. -pub struct PersistentSummaryCache<'tcx> { - /// The sled database that stores the summaries. Can be persisted between runs. +/// A database and collection of in-memory caches for function summaries. +pub struct SummaryCache<'tcx> { + /// The sled database that stores the summaries when persisted between runs. + /// Chiefly used to store summaries for rust standard library functions that have no MIR. db: Db, - /// Functions that are not generic instances are uniquely identified by their def_id and their - /// summaries are cached here. + /// Functions that are entry points have def_ids but no function_id, because they are not + /// derived from function references, have their summaries cached here. def_id_cache: HashMap, - /// Functions that are generic instances are identified by their function_id and their summaries - /// are cached here. - typed_cache: HashMap, - /// Maps call sites to specialized summary of the referenced function. - /// Call site specialization involves using the actual generic arguments supplied by the call + /// Functions that are summarized because they are called via function references, have their + /// summaries cached here. These summaries will be specialized using the generic arguments (if any) + /// supplied by the function reference. + function_id_cache: HashMap, + /// Maps call sites to specialized summaries of the referenced functions. + /// Call site specialization involves using the actual generic type arguments supplied by the call /// site, along with the values of any constant functions that are supplied as actual arguments. - typed_cache_table: HashMap, HashMap>, + /// This cache is only used if the call site supplies generic type arguments or constant functions. + call_site_cache: HashMap, Summary>, /// Functions that have no def_id (and hence no function_id) and no type signature are /// cached here. Such functions are either entry points or dummy functions that provide /// summaries for functions that have no MIR and are shadowed by definitions in a contracts crate. reference_cache: HashMap, Summary>, + /// A cache of summary keys for each def_id. This is used to avoid recomputing the summary key, + /// which is expensive to do and can be done more than once per def_id if there are more than + /// one call site that references the def_id. key_cache: HashMap>, - type_context: TyCtxt<'tcx>, } -impl Debug for PersistentSummaryCache<'_> { +impl Debug for SummaryCache<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { - "PersistentSummaryCache".fmt(f) + "SummaryCache".fmt(f) } } -impl<'tcx> PersistentSummaryCache<'tcx> { - /// Creates a new persistent summary cache, using (or creating) a Sled database at the given - /// directory path. +impl<'tcx> SummaryCache<'tcx> { + /// Creates a new summary cache, using (or creating) a Sled database at the given directory path. #[logfn(TRACE)] - pub fn new( - type_context: TyCtxt<'tcx>, - summary_store_directory_str: String, - ) -> PersistentSummaryCache<'tcx> { + pub fn new(summary_store_directory_str: String) -> SummaryCache<'tcx> { use rand::{thread_rng, Rng}; use std::thread; use std::time::Duration; let mut rng = thread_rng(); - let summary_store_path = - Self::create_default_summary_store_if_needed(&summary_store_directory_str); + let summary_store_path = Self::create_summary_store_if_needed(&summary_store_directory_str); let config = Config::default().path(summary_store_path); let mut result; loop { @@ -450,14 +453,13 @@ impl<'tcx> PersistentSummaryCache<'tcx> { debug!("{} ", err); assume_unreachable!(); }); - PersistentSummaryCache { + SummaryCache { db, def_id_cache: HashMap::new(), - typed_cache: HashMap::new(), - typed_cache_table: HashMap::new(), + function_id_cache: HashMap::new(), + call_site_cache: HashMap::new(), reference_cache: HashMap::new(), key_cache: HashMap::new(), - type_context, } } @@ -465,9 +467,7 @@ impl<'tcx> PersistentSummaryCache<'tcx> { /// The initial value of the database contains summaries of standard library functions. /// The code used to create these summaries are mirai/standard_contracts. #[logfn_inputs(TRACE)] - fn create_default_summary_store_if_needed( - summary_store_directory_str: &str, - ) -> std::path::PathBuf { + fn create_summary_store_if_needed(summary_store_directory_str: &str) -> std::path::PathBuf { use std::env; use std::fs::File; use std::io::Write; @@ -498,9 +498,7 @@ impl<'tcx> PersistentSummaryCache<'tcx> { /// the summary cache, which is a key value store. The string will always be the same as /// long as the definition does not change its name or location, so it can be used to /// transfer information from one compilation to the next, making incremental analysis possible. - #[logfn_inputs(TRACE)] - pub fn get_summary_key_for(&mut self, def_id: DefId) -> &Rc { - let tcx = self.type_context; + pub fn get_summary_key_for(&mut self, def_id: DefId, tcx: TyCtxt<'tcx>) -> &Rc { self.key_cache .entry(def_id) .or_insert_with(|| utils::summary_key_str(tcx, def_id)) @@ -521,47 +519,35 @@ impl<'tcx> PersistentSummaryCache<'tcx> { // Use the ids as keys if they are available, since they make much better keys. (Some(def_id), Some(function_id)) => { if func_args.is_some() || type_args.is_some() { - let typed_cache_key = CallSiteKey::new(func_args.clone(), type_args.clone()); + let typed_cache_key = + CallSiteKey::new(func_args.clone(), type_args.clone(), function_id); // Need the double lookup in order to allow the recursive call to get_summary_for_function_constant. - let summary_is_cached = - if let Some(type_table) = self.typed_cache_table.get(&typed_cache_key) { - type_table.get(&function_id).is_some() - } else { - false - }; + let summary_is_cached = self.call_site_cache.contains_key(&typed_cache_key); return if summary_is_cached { - self.typed_cache_table - .get(&typed_cache_key) - .unwrap() - .get(&function_id) - .unwrap() + self.call_site_cache.get(&typed_cache_key).unwrap() } else { // can't have self borrowed at this point. let summary = self .get_summary_for_call_site(func_ref, &None, &None) .clone(); - self.typed_cache_table + self.call_site_cache .entry(typed_cache_key) - .or_default() - .entry(function_id) .or_insert(summary) }; } - if self.typed_cache.contains_key(&function_id) { - let result = self.typed_cache.get(&function_id); + if self.function_id_cache.contains_key(&function_id) { + let result = self.function_id_cache.get(&function_id); result.expect("value disappeared from typed_cache") } else { if let Some(summary) = self.get_persistent_summary_using_arg_types_if_possible( &func_ref.summary_cache_key, &func_ref.argument_type_key, ) { - return self.typed_cache.entry(function_id).or_insert(summary); + return self.function_id_cache.entry(function_id).or_insert(summary); } - // In this case we default to the summary that is not argument type specific, but - // we need to take care not to cache this summary in self.typed_cache because updating - // self.cache will not also update self.typed_cache. + // In this case we default to the summary that is not argument type specific. let db = &self.db; self.def_id_cache.entry(def_id).or_insert_with(|| { let summary = @@ -655,19 +641,16 @@ impl<'tcx> PersistentSummaryCache<'tcx> { &mut self, func_ref: &Rc, func_args: &Option>>>, - initial_type_cache: &Option, Ty<'tcx>>>>, + type_args: &Option, Ty<'tcx>>>>, summary: Summary, ) { if let Some(func_id) = func_ref.function_id { - if func_args.is_some() || initial_type_cache.is_some() { + if func_args.is_some() || type_args.is_some() { let typed_cache_key = - CallSiteKey::new(func_args.clone(), initial_type_cache.clone()); - self.typed_cache_table - .entry(typed_cache_key) - .or_default() - .insert(func_id, summary); + CallSiteKey::new(func_args.clone(), type_args.clone(), func_id); + self.call_site_cache.insert(typed_cache_key, summary); } else { - self.typed_cache.insert(func_id, summary); + self.function_id_cache.insert(func_id, summary); } } else { //todo: change param to function id @@ -676,9 +659,13 @@ impl<'tcx> PersistentSummaryCache<'tcx> { } /// Sets or updates the DefId cache so that from now on def_id maps to the given summary. - #[logfn_inputs(TRACE)] - pub fn set_summary_for(&mut self, def_id: DefId, summary: Summary) -> Option { - let persistent_key = utils::summary_key_str(self.type_context, def_id); + pub fn set_summary_for( + &mut self, + def_id: DefId, + tcx: TyCtxt<'tcx>, + summary: Summary, + ) -> Option { + let persistent_key = utils::summary_key_str(tcx, def_id); let serialized_summary = bincode::serialize(&summary).unwrap(); let result = self .db