Skip to content

Commit

Permalink
Fix profiling when python symbols aren't available.
Browse files Browse the repository at this point in the history
Since python 3.10 - we haven't been able to profile python interpreters that
have been compiled without symbols. This is because cpython changed where
the 'PyRuntime' global is stored in python 3.10, from being in the BSS section
into being in its own named section in the binary.

This especially affected profiling on windows, where you'd have to install
python symbols to be able to use py-spy.

Fix by reading in the address/size of the the PyRuntime section from the
elf/mach/pe binaries and using that to scan python interpreters when symbols
aren't available.
  • Loading branch information
benfred committed Oct 31, 2024
1 parent 969c4df commit 22acdff
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 24 deletions.
96 changes: 72 additions & 24 deletions src/binary_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pub struct BinaryInfo {
pub symbols: HashMap<String, u64>,
pub bss_addr: u64,
pub bss_size: u64,
pub pyruntime_addr: u64,
pub pyruntime_size: u64,
#[allow(dead_code)]
pub addr: u64,
#[allow(dead_code)]
Expand Down Expand Up @@ -65,11 +67,23 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
}
};

let mut pyruntime_addr = 0;
let mut pyruntime_size = 0;
let mut bss_addr = 0;
let mut bss_size = 0;
for segment in mach.segments.iter() {
for (section, _) in &segment.sections()? {
if section.name()? == "__bss" {
let name = section.name()?;
if name == "PyRuntime" {
if let Some(addr) = section.addr.checked_add(offset) {
if addr.checked_add(section.size).is_some() {
pyruntime_addr = addr;
pyruntime_size = section.size;
}
}
}

if name == "__bss" {
if let Some(addr) = section.addr.checked_add(offset) {
if addr.checked_add(section.size).is_some() {
bss_addr = addr;
Expand All @@ -94,6 +108,8 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
symbols,
bss_addr,
bss_size,
pyruntime_addr,
pyruntime_size,
addr,
size,
})
Expand Down Expand Up @@ -153,6 +169,21 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
bss_end = bss_header.sh_addr + bss_header.sh_size;
}

let pyruntime_header = elf.section_headers.iter().find(|header| {
strtab
.get_at(header.sh_name)
.map_or(false, |name| name == ".PyRuntime")
});

let mut pyruntime_addr = 0;
let mut pyruntime_size = 0;
if let Some(header) = pyruntime_header {
if let Some(addr) = header.sh_addr.checked_add(offset) {
pyruntime_addr = addr;
pyruntime_size = header.sh_size;
}
}

for sym in elf.syms.iter() {
// Skip imported symbols
if sym.is_import()
Expand Down Expand Up @@ -194,6 +225,8 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
symbols,
bss_addr,
bss_size,
pyruntime_addr,
pyruntime_size,
addr,
size,
})
Expand All @@ -209,33 +242,48 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
}
}

pe.sections
.iter()
.find(|section| section.name.starts_with(b".data"))
.ok_or_else(|| {
format_err!(
"Failed to find .data section in PE binary of {}",
filename.display()
)
})
.map(|data_section| {
let mut bss_addr = 0;
let mut bss_size = 0;
if let Some(addr) = offset.checked_add(data_section.virtual_address as u64) {
if addr.checked_add(data_section.virtual_size as u64).is_some() {
let mut bss_addr = 0;
let mut bss_size = 0;
let mut pyruntime_addr = 0;
let mut pyruntime_size = 0;
let mut found_data = false;
for section in pe.sections.iter() {
if section.name.starts_with(b".data") {
found_data = true;
if let Some(addr) = offset.checked_add(section.virtual_address as u64) {
if addr.checked_add(section.virtual_size as u64).is_some() {
bss_addr = addr;
bss_size = u64::from(data_section.virtual_size);
bss_size = u64::from(section.virtual_size);
}
}

BinaryInfo {
symbols,
bss_addr,
bss_size,
addr,
size,
} else if section.name.starts_with(b"PyRuntim") {
// note that the name is only 8 chars here, so we don't check for
// trailing 'e' in PyRuntime
if let Some(addr) = offset.checked_add(section.virtual_address as u64) {
if addr.checked_add(section.virtual_size as u64).is_some() {
pyruntime_addr = addr;
pyruntime_size = u64::from(section.virtual_size);
}
}
})
}
}

if !found_data {
return Err(format_err!(
"Failed to find .data section in PE binary of {}",
filename.display()
));
}

Ok(BinaryInfo {
symbols,
bss_addr,
bss_size,
pyruntime_size,
pyruntime_addr,
addr,
size,
})
}
_ => Err(format_err!("Unhandled binary type")),
}
Expand Down
15 changes: 15 additions & 0 deletions src/python_process_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,21 @@ fn get_interpreter_address_from_binary<P>(
where
P: ProcessMemory,
{
// First check the pyruntime section it was found
if binary.pyruntime_addr != 0 {
let bss = process.copy(
binary.pyruntime_addr as usize,
binary.pyruntime_size as usize,
)?;
#[allow(clippy::cast_ptr_alignment)]
let addrs = unsafe {
slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::<usize>())
};
if let Ok(addr) = check_interpreter_addresses(addrs, maps, process, version) {
return Ok(addr);
}
}

// We're going to scan the BSS/data section for things, and try to narrowly scan things that
// look like pointers to PyinterpreterState
let bss = process.copy(binary.bss_addr as usize, binary.bss_size as usize)?;
Expand Down

0 comments on commit 22acdff

Please sign in to comment.