From cbcfdf09456372800510db84e66f0d0d79252062 Mon Sep 17 00:00:00 2001 From: andrewjcg Date: Fri, 10 Nov 2023 17:23:32 -0500 Subject: [PATCH 1/3] Add chrome trace format for recording samples (#627) This adds "chrometrace" as a new format for the "record" command, which serializes samples using chrome trace events: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU --- src/chrometrace.rs | 122 +++++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 1 + src/main.rs | 21 ++++++++ 3 files changed, 144 insertions(+) create mode 100644 src/chrometrace.rs diff --git a/src/chrometrace.rs b/src/chrometrace.rs new file mode 100644 index 00000000..24463800 --- /dev/null +++ b/src/chrometrace.rs @@ -0,0 +1,122 @@ +use std::cmp::min; +use std::collections::HashMap; +use std::io::Write; +use std::time::Instant; + +use anyhow::Error; +use serde_derive::Serialize; + +use crate::stack_trace::Frame; +use crate::stack_trace::StackTrace; + +#[derive(Clone, Debug, Serialize)] +struct Args { + pub filename: String, + pub line: Option, +} + +#[derive(Clone, Debug, Serialize)] +struct Event { + pub args: Args, + pub cat: String, + pub name: String, + pub ph: String, + pub pid: u64, + pub tid: u64, + pub ts: u64, +} + +pub struct Chrometrace { + events: Vec, + start_ts: Instant, + prev_traces: HashMap, + show_linenumbers: bool, +} + +impl Chrometrace { + pub fn new(show_linenumbers: bool) -> Chrometrace { + Chrometrace { + events: Vec::new(), + start_ts: Instant::now(), + prev_traces: HashMap::new(), + show_linenumbers, + } + } + + // Return whether these frames are similar enough such that we should merge + // them, instead of creating separate events for them. + fn should_merge_frames(&self, a: &Frame, b: &Frame) -> bool { + a.name == b.name && a.filename == b.filename && (!self.show_linenumbers || a.line == b.line) + } + + fn event(&self, trace: &StackTrace, frame: &Frame, phase: &str, ts: u64) -> Event { + Event { + tid: trace.thread_id, + pid: trace.pid as u64, + name: frame.name.to_string(), + cat: "py-spy".to_owned(), + ph: phase.to_owned(), + ts, + args: Args { + filename: frame.filename.to_string(), + line: if self.show_linenumbers { + Some(frame.line as u32) + } else { + None + }, + }, + } + } + + pub fn increment(&mut self, trace: &StackTrace) -> std::io::Result<()> { + let now = self.start_ts.elapsed().as_micros() as u64; + + // Load the previous frames for this thread. + let prev_frames = self + .prev_traces + .remove(&trace.thread_id) + .map(|t| t.frames) + .unwrap_or_default(); + + // Find the index where we first see new frames. + let new_idx = prev_frames + .iter() + .rev() + .zip(trace.frames.iter().rev()) + .position(|(a, b)| !self.should_merge_frames(a, b)) + .unwrap_or(min(prev_frames.len(), trace.frames.len())); + + // Publish end events for the previous frames that got dropped in the + // most recent trace. + for frame in prev_frames.iter().rev().skip(new_idx).rev() { + self.events.push(self.event(trace, frame, "E", now)); + } + + // Publish start events for frames that got added in the most recent + // trace. + for frame in trace.frames.iter().rev().skip(new_idx) { + self.events.push(self.event(trace, frame, "B", now)); + } + + // Save this stack trace for the next iteration. + self.prev_traces.insert(trace.thread_id, trace.clone()); + + Ok(()) + } + + pub fn write(&self, w: &mut dyn Write) -> Result<(), Error> { + let mut events = Vec::new(); + events.extend(self.events.to_vec()); + + // Add end events for any unfinished slices. + let now = self.start_ts.elapsed().as_micros() as u64; + for trace in self.prev_traces.values() { + for frame in &trace.frames { + events.push(self.event(trace, frame, "E", now)); + } + } + + writeln!(w, "{}", serde_json::to_string(&events)?)?; + Ok(()) + } +} diff --git a/src/config.rs b/src/config.rs index 55c2151b..d8c936f9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -67,6 +67,7 @@ pub enum FileFormat { flamegraph, raw, speedscope, + chrometrace, } impl FileFormat { diff --git a/src/main.rs b/src/main.rs index 4b200c21..d2745c79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ extern crate anyhow; extern crate log; mod binary_parser; +mod chrometrace; mod config; mod console_viewer; #[cfg(target_os = "linux")] @@ -108,6 +109,15 @@ impl Recorder for flamegraph::Flamegraph { } } +impl Recorder for chrometrace::Chrometrace { + fn increment(&mut self, trace: &StackTrace) -> Result<(), Error> { + Ok(self.increment(trace)?) + } + fn write(&self, w: &mut dyn Write) -> Result<(), Error> { + self.write(w) + } +} + pub struct RawFlamegraph(flamegraph::Flamegraph); impl Recorder for RawFlamegraph { @@ -129,6 +139,9 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> Some(FileFormat::raw) => Box::new(RawFlamegraph(flamegraph::Flamegraph::new( config.show_line_numbers, ))), + Some(FileFormat::chrometrace) => { + Box::new(chrometrace::Chrometrace::new(config.show_line_numbers)) + } None => return Err(format_err!("A file format is required to record samples")), }; @@ -139,6 +152,7 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> Some(FileFormat::flamegraph) => "svg", Some(FileFormat::speedscope) => "json", Some(FileFormat::raw) => "txt", + Some(FileFormat::chrometrace) => "json", None => return Err(format_err!("A file format is required to record samples")), }; let local_time = Local::now().to_rfc3339_opts(SecondsFormat::Secs, true); @@ -342,6 +356,13 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> ); println!("{}You can use the flamegraph.pl script from https://github.com/brendangregg/flamegraph to generate a SVG", lede); } + FileFormat::chrometrace => { + println!( + "{}Wrote chrome trace to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); + println!("{}Visit chrome://tracing to view", lede); + } }; Ok(()) From 4e69e8300f32a299c80de5cd938bd1b4694cbe91 Mon Sep 17 00:00:00 2001 From: William Manley Date: Fri, 10 Nov 2023 22:25:46 +0000 Subject: [PATCH 2/3] Speed up --gil by skipping work for non-GIL threads (#630) When the user has specified --gil we needn't collect stack traces for the non-GIL threads. This is a significant speedup on heavily threaded Python code. My server has 271 threads (most of which are idle). With the previous code (sampling at 10Hz) I get: $ ./py-spy record -o /var/opt/stbt/profile.svg --pid $(pgrep -f http_service.service) -r 10 -d 10 --gil py-spy> Sampling process 10 times a second for 10 seconds. Press Control-C to exit. py-spy> 1.29s behind in sampling, results may be inaccurate. Try reducing the sampling rate py-spy> 1.36s behind in sampling, results may be inaccurate. Try reducing the sampling rate py-spy> 1.32s behind in sampling, results may be inaccurate. Try reducing the sampling rate py-spy> 1.22s behind in sampling, results may be inaccurate. Try reducing the sampling rate py-spy> 1.00s behind in sampling, results may be inaccurate. Try reducing the sampling rate py-spy> Wrote flamegraph data to '/var/opt/stbt/profile.svg'. Samples: 3 Errors: 51 With this commit I can sample at 80Hz, and the error rate is way lower: $ ./py-spy2 record -o /var/opt/stbt/profile.svg --pid $(pgrep -f http_service.service) -r 80 -d 10 --gil py-spy> Sampling process 80 times a second for 10 seconds. Press Control-C to exit. py-spy> Wrote flamegraph data to '/var/opt/stbt/profile.svg'. Samples: 65 Errors: 0 I suspect the difference in error rate is due to the error handling in `_get_stack_traces` - an error getting information about any thread means that all the traces are thrown away. --- src/python_spy.rs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/python_spy.rs b/src/python_spy.rs index f674cc74..f5e36d68 100644 --- a/src/python_spy.rs +++ b/src/python_spy.rs @@ -189,9 +189,14 @@ impl PythonSpy { fn _get_stack_traces(&mut self) -> Result, Error> { // Query the OS to get if each thread in the process is running or not let mut thread_activity = HashMap::new(); - for thread in self.process.threads()?.iter() { - let threadid: Tid = thread.id()?; - thread_activity.insert(threadid, thread.active()?); + if self.config.gil_only { + // Don't need to collect thread activity if we're only getting the + // GIL thread: If we're holding the GIL we're by definition active. + } else { + for thread in self.process.threads()?.iter() { + let threadid: Tid = thread.id()?; + thread_activity.insert(threadid, thread.active()?); + } } // Lock the process if appropriate. Note we have to lock AFTER getting the thread @@ -224,6 +229,15 @@ impl PythonSpy { .process .copy_pointer(threads) .context("Failed to copy PyThreadState")?; + threads = thread.next(); + + let python_thread_id = thread.thread_id(); + let owns_gil = python_thread_id == gil_thread_id; + + if self.config.gil_only && !owns_gil { + continue; + } + let mut trace = get_stack_trace( &thread, &self.process, @@ -232,7 +246,6 @@ impl PythonSpy { )?; // Try getting the native thread id - let python_thread_id = thread.thread_id(); // python 3.11+ has the native thread id directly on the PyThreadState object, // for older versions of python, try using OS specific code to get the native @@ -255,7 +268,7 @@ impl PythonSpy { } trace.thread_name = self._get_python_thread_name(python_thread_id); - trace.owns_gil = trace.thread_id == gil_thread_id; + trace.owns_gil = owns_gil; trace.pid = self.process.pid; // Figure out if the thread is sleeping from the OS if possible @@ -314,7 +327,11 @@ impl PythonSpy { return Err(format_err!("Max thread recursion depth reached")); } - threads = thread.next(); + if self.config.gil_only { + // There's only one GIL thread and we've captured it, so we can + // stop now + break; + } } Ok(traces) } From bb0e7e8929c9e23c0bb6ebebb5536ff376894a0d Mon Sep 17 00:00:00 2001 From: andrewjcg Date: Fri, 10 Nov 2023 17:26:32 -0500 Subject: [PATCH 3/3] Support facebook's cinder.3.8 Python fork (#631) * Capture build metadata in version struct Also include the build metada in the version struct. * Support facebook's cinder.3.8 Python fork Facebook's cinder.3.8 Python fork uses a different offset for the current tstate. Use the buildmetadata to detect and special case this (https://github.com/facebookincubator/cinder/blob/cinder/3.8/Include/patchlevel.h#L26). --- src/coredump.rs | 1 + src/python_bindings/mod.rs | 5 ++++- src/python_process_info.rs | 1 + src/version.rs | 46 +++++++++++++++++++++++++++++++------- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/coredump.rs b/src/coredump.rs index 51a66361..53940bdb 100644 --- a/src/coredump.rs +++ b/src/coredump.rs @@ -441,6 +441,7 @@ mod test { minor: 9, patch: 13, release_flags: "".to_owned(), + build_metadata: None, }; let python_core = PythonCoreDump { core, diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index 74a6ce89..420e3c9a 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -222,7 +222,10 @@ pub mod pyruntime { }, Version { major: 3, minor: 8, .. - } => Some(1368), + } => match version.build_metadata.as_deref() { + Some("cinder") => Some(1384), + _ => Some(1368), + }, Version { major: 3, minor: 9..=10, diff --git a/src/python_process_info.rs b/src/python_process_info.rs index 4c6210cb..dd802c67 100644 --- a/src/python_process_info.rs +++ b/src/python_process_info.rs @@ -318,6 +318,7 @@ where minor, patch: 0, release_flags: "".to_owned(), + build_metadata: None, }); } } diff --git a/src/version.rs b/src/version.rs index 27d8538b..946febc7 100644 --- a/src/version.rs +++ b/src/version.rs @@ -9,6 +9,7 @@ pub struct Version { pub minor: u64, pub patch: u64, pub release_flags: String, + pub build_metadata: Option, } impl Version { @@ -28,6 +29,11 @@ impl Version { let major = std::str::from_utf8(&cap[2])?.parse::()?; let minor = std::str::from_utf8(&cap[3])?.parse::()?; let patch = std::str::from_utf8(&cap[4])?.parse::()?; + let build_metadata = if let Some(s) = cap.get(7) { + Some(std::str::from_utf8(&s.as_bytes()[1..])?.to_owned()) + } else { + None + }; let version = std::str::from_utf8(&cap[0])?; info!("Found matching version string '{}'", version); @@ -47,6 +53,7 @@ impl Version { minor, patch, release_flags: release.to_owned(), + build_metadata, }); } Err(format_err!("failed to find version string")) @@ -59,7 +66,11 @@ impl std::fmt::Display for Version { f, "{}.{}.{}{}", self.major, self.minor, self.patch, self.release_flags - ) + )?; + if let Some(build_metadata) = &self.build_metadata { + write!(f, "+{}", build_metadata,)? + } + Ok(()) } } @@ -75,7 +86,8 @@ mod tests { major: 2, minor: 7, patch: 10, - release_flags: "".to_owned() + release_flags: "".to_owned(), + build_metadata: None, } ); @@ -89,7 +101,8 @@ mod tests { major: 3, minor: 6, patch: 3, - release_flags: "".to_owned() + release_flags: "".to_owned(), + build_metadata: None, } ); @@ -102,7 +115,8 @@ mod tests { major: 3, minor: 7, patch: 0, - release_flags: "rc1".to_owned() + release_flags: "rc1".to_owned(), + build_metadata: None, } ); @@ -115,7 +129,8 @@ mod tests { major: 3, minor: 10, patch: 0, - release_flags: "rc1".to_owned() + release_flags: "rc1".to_owned(), + build_metadata: None, } ); @@ -137,7 +152,8 @@ mod tests { major: 2, minor: 7, patch: 15, - release_flags: "".to_owned() + release_flags: "".to_owned(), + build_metadata: Some("".to_owned()), } ); @@ -148,7 +164,8 @@ mod tests { major: 2, minor: 7, patch: 10, - release_flags: "".to_owned() + release_flags: "".to_owned(), + build_metadata: Some("dcba".to_owned()), } ); @@ -159,7 +176,20 @@ mod tests { major: 2, minor: 7, patch: 10, - release_flags: "".to_owned() + release_flags: "".to_owned(), + build_metadata: Some("5-4.abcd".to_owned()), + } + ); + + let version = Version::scan_bytes(b"2.8.5+cinder (default)").unwrap(); + assert_eq!( + version, + Version { + major: 2, + minor: 8, + patch: 5, + release_flags: "".to_owned(), + build_metadata: Some("cinder".to_owned()), } ); }