From bfd5c0ea5dab312fd3bbfd3de1a469277b3cb9ff Mon Sep 17 00:00:00 2001 From: Nick-Eagles Date: Tue, 3 Oct 2023 12:13:16 -0400 Subject: [PATCH] Add an example output from 'job_report'; add a vignette section showing how to use 'job_report' --- NAMESPACE | 1 + inst/extdata/job_report_df.rds | Bin 0 -> 472 bytes man/job_report.Rd | 35 ++++++++++++++++++++++++++++ vignettes/slurmjobs.Rmd | 41 +++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 inst/extdata/job_report_df.rds create mode 100644 man/job_report.Rd diff --git a/NAMESPACE b/NAMESPACE index c65b9e5..f070575 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(job_info) export(job_loop) +export(job_report) export(job_single) export(with_wd) import(dplyr) diff --git a/inst/extdata/job_report_df.rds b/inst/extdata/job_report_df.rds new file mode 100644 index 0000000000000000000000000000000000000000..41fcc78f467fb4731788954e3046d396b9e67b93 GIT binary patch literal 472 zcmV;}0Vn<+iwFP!000001JzSKPXa*@UG5I`K#Ymh);1ai6sXXs7!!<|XkuYeKR)~4{Nv4%glXi`{{bJm3f2@WF7vc=vwA2>JlB^ zA77gLQ?GZ;U);C%3PoRZa6`0QbQpXXd<5L|bf0xRqrUbX?~h&6d+$$~a<>eV zoRzp$aBR|fKpb=Up?V;bI;S)8P@>h^6@1$5_M5Lay4;56E0$gKkPb+eYGaOPid38! O8utN%8UrK}1^@t&1K-I2 literal 0 HcmV?d00001 diff --git a/man/job_report.Rd b/man/job_report.Rd new file mode 100644 index 0000000..87245ba --- /dev/null +++ b/man/job_report.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/job_report.R +\name{job_report} +\alias{job_report} +\title{Given the ID for a completed or currently running SLURM job, return a tibble +containing outputs from 'sacct'. In particular, returns information about +job name, ID, user, status, and allocated and used memory and CPUs.} +\usage{ +job_report(job_id) +} +\arguments{ +\item{job_id}{The ID associated with a finished or currently running SLURM +job (character(1) or integer(1))} +} +\value{ +A tibble with information about the requested job. +} +\description{ +Given the ID for a completed or currently running SLURM job, return a tibble +containing outputs from 'sacct'. In particular, returns information about +job name, ID, user, status, and allocated and used memory and CPUs. +} +\examples{ + +# Must be run in a SLURM environment +if (system("which sbatch") == 0) { + job_df = job_report('234904') + + # Check max virtual memory reached by this job + print(job_df$max_vmem_gb) +} +} +\author{ +Nicholas J. Eagles +} diff --git a/vignettes/slurmjobs.Rmd b/vignettes/slurmjobs.Rmd index cedd827..05ac1e5 100644 --- a/vignettes/slurmjobs.Rmd +++ b/vignettes/slurmjobs.Rmd @@ -135,6 +135,47 @@ job_df |> print() ``` +# Analyzing Finished Jobs + +The `job_report()` function returns in-depth information about a single queued, running, or finished job. Note that through SLURM, an array job can be referenced as a whole, or by one of its tasks. + +Suppose you have a workflow that operates as an array job, and you'd like to profile memory usage across the many tasks. Suppose we've done an initial trial, setting memory relatively high just to get the jobs running without issues. One use of `job_report` could be to determine a better memory request in a data-driven way-- the better settings can then be run on the larger dataset after the initial test. + +On an actual system with SLURM installed, you'd normally run something like `job_df = job_report(slurm_job_id)` for the `slurm_job_id` (character or integer) representing the small test. For convenience, we'll start from the output of `job_report` as available in the `slurmjobs` package. + +```{r "job_report_quick_look"} +job_df = readRDS( + system.file("extdata", "job_report_df.rds", package = "slurmjobs") +) +print(job_df) +``` + +Now let's choose a better memory request, + +```{r "job_report_adjust_mem"} +stat_df = job_df |> + # This example includes tasks that fail. We're only interested in memory + # for successfully completed tasks + filter(status != 'FAILED') |> + summarize( + mean_mem = mean(max_vmem_gb), + std_mem = sd(max_vmem_gb), + max_mem = max(max_vmem_gb) + ) + +# We could choose a new memory request as 3 standard deviations above the mean +# of actual memory usage +new_limit = stat_df$mean_mem + 3 * stat_df$std_mem + +print( + sprintf( + "%.02fG is a better memory request than %.02fG, which was used before", + new_limit, + job_df$requested_mem_gb[1] + ) +) +``` + # Reproducibility The `r Biocpkg("slurmjobs")` package `r Citep(bib[["slurmjobs"]])` was made possible thanks to: