From 53346637abadec8187fd2d9adcc95126f82c17eb Mon Sep 17 00:00:00 2001 From: hermanventer Date: Mon, 9 Dec 2024 11:43:47 -0800 Subject: [PATCH] Option to print summaries --- checker/src/call_graph.rs | 18 +++++- checker/src/callbacks.rs | 4 ++ checker/src/crate_visitor.rs | 12 +++- checker/src/options.rs | 2 +- checker/src/summaries.rs | 114 ++++++++++++++++++++++++++++++++++- 5 files changed, 144 insertions(+), 6 deletions(-) diff --git a/checker/src/call_graph.rs b/checker/src/call_graph.rs index 6245abe1..1122593c 100644 --- a/checker/src/call_graph.rs +++ b/checker/src/call_graph.rs @@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize}; use mirai_annotations::*; use rustc_hir::def_id::DefId; use rustc_middle::ty::TyCtxt; +use rustc_span::Span; // An unique identifier for a Rust type string. type TypeId = u32; @@ -116,6 +117,8 @@ pub struct CallGraphConfig { included_crates: Vec>, /// Datalog output configuration datalog_config: Option, + /// If true, collect all call sites. + pub include_calls_in_summaries: bool, } impl CallGraphConfig { @@ -132,6 +135,7 @@ impl CallGraphConfig { reductions, included_crates, datalog_config, + include_calls_in_summaries: false, } } @@ -314,7 +318,7 @@ impl CallGraphEdge { #[derive(Clone)] pub struct CallGraph<'tcx> { /// Configuration for the call graph - config: CallGraphConfig, + pub config: CallGraphConfig, /// A context to use for getting the information about DefIds that is needed /// to generate fully qualified names for them. tcx: TyCtxt<'tcx>, @@ -368,7 +372,7 @@ impl<'tcx> CallGraph<'tcx> { self.config.dot_output_path.is_some() || self.config.datalog_config.is_some() } - /// Produce an updated call graph structure that preserves all of the + /// Produce an updated call graph structure that preserves all the /// fields except `graph`, which is replaced. fn update(&self, graph: Graph) -> CallGraph<'tcx> { CallGraph { @@ -433,7 +437,7 @@ impl<'tcx> CallGraph<'tcx> { callee: DefId, external_callee: bool, ) { - if self.config.call_sites_output_path.is_some() && !self.non_local_defs.contains(&caller) { + if self.config.include_calls_in_summaries || (self.config.call_sites_output_path.is_some() && !self.non_local_defs.contains(&caller)) { self.call_sites.insert(loc, (caller, callee)); if external_callee { self.non_local_defs.insert(callee); @@ -1082,6 +1086,14 @@ impl<'tcx> CallGraph<'tcx> { call_graph.to_call_sites(Path::new(call_path.as_ref())); } } + + pub fn get_calls_for_def_ids(&self) -> HashMap> { + let mut calls = HashMap::>::new(); + for (span, (caller, callee)) in self.call_sites.iter() { + calls.entry(*caller).or_default().push((*span, *callee)); + } + calls + } } /// Supported Datalog output formats diff --git a/checker/src/callbacks.rs b/checker/src/callbacks.rs index 51eec854..fe95c5b3 100644 --- a/checker/src/callbacks.rs +++ b/checker/src/callbacks.rs @@ -168,7 +168,11 @@ impl MiraiCallbacks { type_cache: Rc::new(RefCell::new(TypeCache::new())), call_graph: CallGraph::new(call_graph_config, tcx), }; + if crate_visitor.options.print_summaries { + crate_visitor.call_graph.config.include_calls_in_summaries = true; + } crate_visitor.analyze_some_bodies(); crate_visitor.call_graph.output(); + crate_visitor.print_summaries(); } } diff --git a/checker/src/crate_visitor.rs b/checker/src/crate_visitor.rs index 2be69341..1d7a4654 100644 --- a/checker/src/crate_visitor.rs +++ b/checker/src/crate_visitor.rs @@ -80,7 +80,7 @@ impl<'compilation> CrateVisitor<'compilation, '_> { DefId::local(DefIndex::from_u32(0)) }; - // Analyze all functions that are white listed or public + // Analyze all functions that are whitelisted or public let building_standard_summaries = std::env::var("MIRAI_START_FRESH").is_ok(); for local_def_id in self.tcx.hir().body_owners() { let def_id = local_def_id.to_def_id(); @@ -185,6 +185,7 @@ impl<'compilation> CrateVisitor<'compilation, '_> { let kind = self.tcx.def_kind(def_id); if matches!(kind, rustc_hir::def::DefKind::Static { .. }) || utils::is_foreign_contract(self.tcx, def_id) + || self.options.print_summaries { self.summary_cache .set_summary_for(def_id, self.tcx, summary); @@ -290,4 +291,13 @@ impl<'compilation> CrateVisitor<'compilation, '_> { } } } + + pub fn print_summaries(&mut self) { + if !self.options.print_summaries { + return; + } + let calls_for_def_ids = self.call_graph.get_calls_for_def_ids(); + let summaries_for_llm = self.summary_cache.get_summaries_for_llm(self.tcx, calls_for_def_ids); + print!("{}", summaries_for_llm.to_json()); + } } diff --git a/checker/src/options.rs b/checker/src/options.rs index 91c4e253..641acb3e 100644 --- a/checker/src/options.rs +++ b/checker/src/options.rs @@ -243,7 +243,7 @@ impl Options { matches.value_source("print_summaries"), Some(ValueSource::DefaultValue) ) { - self.print_summaries = false; + self.print_summaries = true; } args[rustc_args_start..].to_vec() } diff --git a/checker/src/summaries.rs b/checker/src/summaries.rs index 32b5d27f..986e220e 100644 --- a/checker/src/summaries.rs +++ b/checker/src/summaries.rs @@ -3,8 +3,10 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. +use std::path::PathBuf; use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; +use std::env::current_dir; use std::fmt::{Debug, Formatter, Result}; use std::hash::{Hash, Hasher}; use std::ops::Deref; @@ -18,6 +20,7 @@ use sled::{Config, Db}; use mirai_annotations::*; use rustc_hir::def_id::DefId; use rustc_middle::ty::{Ty, TyCtxt}; +use rustc_span::Span; use crate::abstract_value::AbstractValue; use crate::abstract_value::AbstractValueTrait; @@ -275,7 +278,7 @@ fn add_provenance(preconditions: &[Precondition], tcx: TyCtxt<'_>) -> Vec SummaryCache<'tcx> { store_path } + pub fn get_summaries_for_llm(&self, tcx: TyCtxt, call_site_per_def_id: HashMap>) -> SummariesForLLM { + let source_map = tcx.sess.source_map(); + let mut entries = Vec::new(); + for (key, value) in self.def_id_cache.iter() { + let fully_qualified_name = self.key_cache.get(key).unwrap().to_string(); + let file_name; + let source; + if tcx.is_mir_available(*key) { + let mir = if tcx.is_const_fn(*key) { + tcx.mir_for_ctfe(*key) + } else { + let instance = rustc_middle::ty::InstanceKind::Item(*key); + tcx.instance_mir(instance) + }; + file_name = source_map.span_to_filename(mir.span).into_local_path(); + source = source_map.span_to_snippet(mir.span).ok().unwrap_or_default(); + } else { + let span = tcx.def_span(*key); + file_name = source_map.span_to_filename(span).into_local_path(); + source = source_map.span_to_snippet(span).ok().unwrap_or_default(); + } + let mut path = None; + if let Some(mut p) = file_name { + if p.is_absolute() { + p = p.strip_prefix(current_dir().unwrap_or_default()).unwrap_or(&p).to_path_buf(); + if p.is_absolute() { + let sysroot = utils::find_sysroot(); + let rel = p.strip_prefix(sysroot).unwrap_or(&p); + p = PathBuf::from("/").join(rel).to_path_buf(); + } + } + path = Some(p.to_string_lossy().to_string()); + } + let mut calls = vec![]; + if let Some(call_vec) = call_site_per_def_id.get(key) { + for (span, def_id) in call_vec.iter() { + let call_snippet = source_map.span_to_snippet(*span).ok().unwrap_or_default(); + let callee_name = self.key_cache.get(def_id).unwrap().to_string(); + calls.push((call_snippet, callee_name)); + } + }; + entries.push(( + path.unwrap_or_default(), + fully_qualified_name, + source, + LLMSummary::from_summary(&value, calls), + )); + } + SummariesForLLM { entries } + } + /// Returns (and caches) a string that uniquely identifies a definition to serve as a key to /// the summary cache, which is a key value store. The string will always be the same as /// long as the definition does not change its name or location, so it can be used to @@ -645,6 +699,11 @@ impl<'tcx> SummaryCache<'tcx> { summary: Summary, ) { if let Some(func_id) = func_ref.function_id { + if let Some(def_id) = func_ref.def_id { + if func_args.is_none() && type_args.is_none() { + self.def_id_cache.insert(def_id, summary.clone()); + } + } if func_args.is_some() || type_args.is_some() { let typed_cache_key = CallSiteKey::new(func_args.clone(), type_args.clone(), func_id); @@ -676,3 +735,56 @@ impl<'tcx> SummaryCache<'tcx> { self.def_id_cache.insert(def_id, summary) } } + +#[derive(Serialize)] +pub struct SummariesForLLM { + // (source path, fully qualified function name, function source, summary) + entries: Vec<(String, String, String, LLMSummary)>, +} + +impl SummariesForLLM { + pub fn to_json(&self) -> String { + serde_json::to_string_pretty(&self).unwrap() + } +} + +#[derive(Serialize)] +pub struct LLMSummary { + // Conditions that should hold prior to the call. + // Callers should substitute parameter values with argument values and simplify the results + // under the current path condition. Any values that do not simplify to true will require the + // caller to either generate an error message or to add a precondition to its own summary that + // will be sufficient to ensure that all the preconditions in this summary are met. + // The string value bundled with the condition is the message that details what would go + // wrong at runtime if the precondition is not satisfied by the caller. + //pub preconditions: Vec, + + // Modifications the function makes to mutable state external to the function. + // Every path will be rooted in a static or in a mutable parameter. + // No two paths in this collection will lead to the same place in memory. + // Callers should substitute parameter values with argument values and simplify the results + // under the current path condition. They should then update their current state to reflect the + // side effects of the call. + //pub side_effects: Vec<(Rc, Rc)>, + + // A condition that should hold after a call that completes normally. + // Callers should substitute parameter values with argument values and simplify the results + // under the current path condition. + // The resulting value should be conjoined to the current path condition. + //pub post_condition: Option>, + + // The set of function calls made by this function. The first element is the source snippet of + // the call and the second is the fully qualified name of the function being called. + calls: Vec<(String, String)>, +} + +impl LLMSummary { + pub fn from_summary(_summary: &Summary, calls: Vec<(String, String)>) -> LLMSummary { + LLMSummary { + // preconditions: vec![], + // side_effects: vec![], + // post_condition: vec![], + calls, + } + } +}