Skip to content

Commit

Permalink
gpu: support amdgpu tracking
Browse files Browse the repository at this point in the history
Co-authored-by: lvxnull2 <[email protected]>
  • Loading branch information
yretenai and lvxnull2 committed Dec 2, 2024
1 parent 1fe17dd commit a4c83d7
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 13 deletions.
20 changes: 20 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ required-features = ["generate_schema"]
# Used for general builds.
battery = ["starship-battery"]
nvidia = ["nvml-wrapper"]
gpu = ["nvidia"]
amd = ["libamdgpu_top"]
gpu = ["nvidia", "amd"]
zfs = []
deploy = ["battery", "gpu", "zfs"]
default = ["deploy"]
Expand Down Expand Up @@ -112,6 +113,7 @@ libc = "0.2.162"

[target.'cfg(target_os = "linux")'.dependencies]
rustix = { version = "0.38.40", features = ["fs", "param"] }
libamdgpu_top = { version = "0.10.0", optional = true }

[target.'cfg(target_os = "macos")'.dependencies]
core-foundation = "0.10.0"
Expand Down
6 changes: 3 additions & 3 deletions docs/content/configuration/command-line-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ see information on these options by running `btm -h`, or run `btm --help` to dis

## GPU Options

| Option | Behaviour |
| --------------- | --------------------------------------------------------- |
| `--disable_gpu` | Disable collecting and displaying NVIDIA GPU information. |
| Option | Behaviour |
| --------------- | ----------------------------------------------------------------- |
| `--disable_gpu` | Disable collecting and displaying NVIDIA and AMD GPU information. |

## Style Options

Expand Down
2 changes: 1 addition & 1 deletion docs/content/configuration/config-file/flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ each time:
| `network_use_binary_prefix` | Boolean | Displays the network widget with binary prefixes. |
| `network_use_bytes` | Boolean | Displays the network widget using bytes. |
| `network_use_log` | Boolean | Displays the network widget with a log scale. |
| `disable_gpu` | Boolean | Disable NVIDIA GPU data collection. |
| `disable_gpu` | Boolean | Disable NVIDIA and AMD GPU data collection. |
| `retention` | String (human readable time, such as "10m", "1h", etc.) | How much data is stored at once in terms of time. |
| `unnormalized_cpu` | Boolean | Show process CPU% without normalizing over the number of cores. |
| `expanded` | Boolean | Expand the default widget upon starting the app. |
Expand Down
2 changes: 1 addition & 1 deletion docs/content/usage/widgets/memory.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ If the total RAM or swap available is 0, then it is automatically hidden from th

One can also adjust the displayed time range through either the keyboard or mouse, with a range of 30s to 600s.

This widget can also be configured to display Nvidia GPU memory usage (`--disable_gpu` on Linux/Windows to disable) or cache memory usage (`--enable_cache_memory`).
This widget can also be configured to display Nvidia and AMD GPU memory usage (`--disable_gpu` on Linux/Windows to disable) or cache memory usage (`--enable_cache_memory`).

## Key bindings

Expand Down
2 changes: 1 addition & 1 deletion docs/content/usage/widgets/temperature.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ The temperature widget provides a table of temperature sensors and their current

The temperature widget provides the sensor name as well as its current temperature.

This widget can also be configured to display Nvidia GPU temperatures (`--disable_gpu` on Linux/Windows to disable).
This widget can also be configured to display Nvidia and AMD GPU temperatures (`--disable_gpu` on Linux/Windows to disable).

## Key bindings

Expand Down
43 changes: 38 additions & 5 deletions src/data_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#[cfg(feature = "nvidia")]
pub mod nvidia;

#[cfg(all(feature = "amd", target_os = "linux"))]
pub mod amd;

#[cfg(feature = "battery")]
pub mod batteries;

Expand Down Expand Up @@ -347,6 +350,10 @@ impl DataCollector {
#[inline]
fn update_gpus(&mut self) {
if self.widgets_to_harvest.use_gpu {
let mut local_gpu: Vec<(String, memory::MemHarvest)> = Vec::new();
let mut local_gpu_pids: Vec<HashMap<u32, (u64, u32)>> = Vec::new();
let mut local_gpu_total_mem: u64 = 0;

#[cfg(feature = "nvidia")]
if let Some(data) = nvidia::get_nvidia_vecs(
&self.temperature_type,
Expand All @@ -360,14 +367,40 @@ impl DataCollector {
self.data.temperature_sensors = Some(temp);
}
}
if let Some(mem) = data.memory {
self.data.gpu = Some(mem);
if let Some(mut mem) = data.memory {
local_gpu.append(&mut mem);
}
if let Some(mut proc) = data.procs {
local_gpu_pids.append(&mut proc.1);
local_gpu_total_mem += proc.0;
}
}

#[cfg(all(feature = "amd", target_os = "linux"))]
if let Some(data) = amd::get_amd_vecs(
&self.temperature_type,
&self.filters.temp_filter,
&self.widgets_to_harvest,
) {
if let Some(mut temp) = data.temperature {
if let Some(sensors) = &mut self.data.temperature_sensors {
sensors.append(&mut temp);
} else {
self.data.temperature_sensors = Some(temp);
}
}
if let Some(mut mem) = data.memory {
local_gpu.append(&mut mem);
}
if let Some(proc) = data.procs {
self.gpu_pids = Some(proc.1);
self.gpus_total_mem = Some(proc.0);
if let Some(mut proc) = data.procs {
local_gpu_pids.append(&mut proc.1);
local_gpu_total_mem += proc.0;
}
}

self.data.gpu = Some(local_gpu);
self.gpu_pids = Some(local_gpu_pids);
self.gpus_total_mem = Some(local_gpu_total_mem);
}
}

Expand Down
154 changes: 154 additions & 0 deletions src/data_collection/amd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
use libamdgpu_top::{
has_vcn, has_vcn_unified, has_vpe,
stat::{self, FdInfoSortType, FdInfoStat, ProcInfo, Sensors},
DevicePath,
PCI::BUS_INFO,
};

use crate::{
app::{filter::Filter, layout_manager::UsedWidgets},
data_collection::{
memory::MemHarvest,
temperature::{TempHarvest, TemperatureType},
},
};
use hashbrown::HashMap;
use std::sync::{LazyLock, Mutex};
use std::time::Duration;

pub struct AMDGPUData {
pub memory: Option<Vec<(String, MemHarvest)>>,
pub temperature: Option<Vec<TempHarvest>>,
pub procs: Option<(u64, Vec<HashMap<u32, (u64, u32)>>)>,
}

// needs previous state
static PROC_DATA: LazyLock<Mutex<HashMap<BUS_INFO, FdInfoStat>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));

#[inline]
pub fn get_amd_vecs(
temp_type: &TemperatureType, filter: &Option<Filter>, widgets_to_harvest: &UsedWidgets,
) -> Option<AMDGPUData> {
let device_path_list = DevicePath::get_device_path_list();
let num_gpu = device_path_list.len();
let mut temp_vec = Vec::with_capacity(num_gpu as usize);
let mut mem_vec = Vec::with_capacity(num_gpu as usize);
let mut proc_vec = Vec::with_capacity(num_gpu as usize);
let mut total_mem = 0;
let mut proc_map = PROC_DATA.lock().unwrap();

for device_path in DevicePath::get_device_path_list() {
if let Ok(amdgpu_dev) = device_path.init() {
let pci_bus = device_path.pci;
let Ok(ext_info) = amdgpu_dev.device_info() else {
continue;
};
let name = amdgpu_dev.get_marketing_name_or_default();

if widgets_to_harvest.use_temp && Filter::optional_should_keep(filter, &name) {
let sensors = Sensors::new(&amdgpu_dev, &pci_bus, &ext_info);
if let Some(ref sensors) = sensors {
for temp in [
&sensors.edge_temp,
&sensors.junction_temp,
&sensors.memory_temp,
] {
let Some(temp) = temp else { continue };
let temperature = temp_type.convert_temp_unit(temp.current as f32);
temp_vec.push(TempHarvest {
name: format!("{} {}", name, temp.type_),
temperature: Some(temperature),
});
}
}
}

if widgets_to_harvest.use_mem {
if let Ok(memory_info) = amdgpu_dev.memory_info() {
mem_vec.push((
name.clone(),
MemHarvest {
total_bytes: memory_info.vram.total_heap_size,
used_bytes: memory_info.vram.heap_usage,
use_percent: if memory_info.vram.total_heap_size == 0 {
None
} else {
Some(
memory_info.vram.heap_usage as f64
/ memory_info.vram.total_heap_size as f64
* 100.0,
)
},
},
));
}
}

if widgets_to_harvest.use_proc {
let default_fdinfo = FdInfoStat {
has_vcn: has_vcn(&amdgpu_dev),
has_vcn_unified: has_vcn_unified(&amdgpu_dev),
has_vpe: has_vpe(&amdgpu_dev),
interval: Duration::from_secs(1),
..Default::default()
};
let _ = proc_map.try_insert(pci_bus, default_fdinfo);
let fdinfo = proc_map.get_mut(&pci_bus).unwrap();

let mut proc_index: Vec<ProcInfo> = Vec::new();
stat::update_index(&mut proc_index, &device_path);
fdinfo.get_all_proc_usage(&proc_index);
fdinfo.sort_proc_usage(FdInfoSortType::default(), false);

let mut procs = HashMap::new();

for pu in fdinfo.proc_usage.clone() {
let usage_vram = pu.usage.vram_usage << 10; // KiB -> B
let pid: u32 = pu.pid.try_into().unwrap_or(0);
let mut gpu_util_wide = pu.usage.gfx;

if fdinfo.has_vcn_unified {
gpu_util_wide += pu.usage.media;
} else if fdinfo.has_vcn {
gpu_util_wide += pu.usage.enc + pu.usage.dec;
}

if fdinfo.has_vpe {
gpu_util_wide += pu.usage.vpe;
}

let gpu_util: u32 = gpu_util_wide.try_into().unwrap_or(0);

procs.insert(pid, (usage_vram, gpu_util));
}

if !procs.is_empty() {
proc_vec.push(procs);
}

if let Ok(memory_info) = amdgpu_dev.memory_info() {
total_mem += memory_info.vram.total_heap_size
}
}
}
}

Some(AMDGPUData {
memory: if !mem_vec.is_empty() {
Some(mem_vec)
} else {
None
},
temperature: if !temp_vec.is_empty() {
Some(temp_vec)
} else {
None
},
procs: if !proc_vec.is_empty() {
Some((total_mem, proc_vec))
} else {
None
},
})
}
2 changes: 1 addition & 1 deletion src/options/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ pub struct BatteryArgs {
#[derive(Args, Clone, Debug, Default)]
#[command(next_help_heading = "GPU Options", rename_all = "snake_case")]
pub struct GpuArgs {
#[arg(long, action = ArgAction::SetTrue, help = "Disable collecting and displaying NVIDIA GPU information.")]
#[arg(long, action = ArgAction::SetTrue, help = "Disable collecting and displaying NVIDIA and AMD GPU information.")]
pub disable_gpu: bool,
}

Expand Down

0 comments on commit a4c83d7

Please sign in to comment.