From bfd5c0ea5dab312fd3bbfd3de1a469277b3cb9ff Mon Sep 17 00:00:00 2001
From: Nick-Eagles <nick.eagles@libd.org>
Date: Tue, 3 Oct 2023 12:13:16 -0400
Subject: [PATCH] Add an example output from 'job_report'; add a vignette
 section showing how to use 'job_report'

---
 NAMESPACE                      |   1 +
 inst/extdata/job_report_df.rds | Bin 0 -> 472 bytes
 man/job_report.Rd              |  35 ++++++++++++++++++++++++++++
 vignettes/slurmjobs.Rmd        |  41 +++++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)
 create mode 100644 inst/extdata/job_report_df.rds
 create mode 100644 man/job_report.Rd

diff --git a/NAMESPACE b/NAMESPACE
index c65b9e5..f070575 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(job_info)
 export(job_loop)
+export(job_report)
 export(job_single)
 export(with_wd)
 import(dplyr)
diff --git a/inst/extdata/job_report_df.rds b/inst/extdata/job_report_df.rds
new file mode 100644
index 0000000000000000000000000000000000000000..41fcc78f467fb4731788954e3046d396b9e67b93
GIT binary patch
literal 472
zcmV;}0Vn<+iwFP!000001JzSKPXa*@UG5I`K#Ymh);1ai6sXXs7!!<|XkuY<S-3@!
zkHg)?&&I}@KVfZ6?GG_AmNv#jOL6wR1@<J7LSvj{Zsxsx*_)l6-3uXvbY$oOqz55I
z!QGwgbPBi$0AcWP@DMVP32`WvPR8Rg1;rJVP|&=B78I0JP)b4RUX++s*5-UQ1sjae
zxxWEwC8K%jlu6CPl;ZP$$ss-Bo(G}8U~Xt`^zNrZPDq7$UFopw5L06Nw826JvoF#j
zE^voJ_6{yf(FXJ}p<j$5*M2I>eKR)~4{Nv4%glXi`{{bJm3f2@WF7vc=vwA2>JlB^
zA77gLQ?GZ;U);C%3PoRZa6`0QbQpXXd<5L|bf0xRqrUbX?~h&6d+$$<j6c`9oEVQl
z8+}fM=1qjTyt%QJTisb*kt32_TF(J;<(icD9UyhyYPd?|yoIIw$jMtJ+e3zU(=r!w
zSwkIDUg9OECKn6!2J`Z9N{$<ZabjClQne299v`aWGmA3TWx|3vQA82<)ma>~a<>eV
zoRzp$aBR|fKpb=Up?V;bI;S)8P@>h^6@1$5_M5Lay4;56E0$gKkPb+eYGaOPid38!
O8utN%8UrK}1^@t&1K-I2

literal 0
HcmV?d00001

diff --git a/man/job_report.Rd b/man/job_report.Rd
new file mode 100644
index 0000000..87245ba
--- /dev/null
+++ b/man/job_report.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/job_report.R
+\name{job_report}
+\alias{job_report}
+\title{Given the ID for a completed or currently running SLURM job, return a tibble
+containing outputs from 'sacct'. In particular, returns information about
+job name, ID, user, status, and allocated and used memory and CPUs.}
+\usage{
+job_report(job_id)
+}
+\arguments{
+\item{job_id}{The ID associated with a finished or currently running SLURM
+job (character(1) or integer(1))}
+}
+\value{
+A tibble with information about the requested job.
+}
+\description{
+Given the ID for a completed or currently running SLURM job, return a tibble
+containing outputs from 'sacct'. In particular, returns information about
+job name, ID, user, status, and allocated and used memory and CPUs.
+}
+\examples{
+
+#    Must be run in a SLURM environment
+if (system("which sbatch") == 0) {
+    job_df = job_report('234904')
+    
+    #    Check max virtual memory reached by this job
+    print(job_df$max_vmem_gb)
+}
+}
+\author{
+Nicholas J. Eagles
+}
diff --git a/vignettes/slurmjobs.Rmd b/vignettes/slurmjobs.Rmd
index cedd827..05ac1e5 100644
--- a/vignettes/slurmjobs.Rmd
+++ b/vignettes/slurmjobs.Rmd
@@ -135,6 +135,47 @@ job_df |>
     print()
 ```
 
+# Analyzing Finished Jobs
+
+The `job_report()` function returns in-depth information about a single queued, running, or finished job. Note that through SLURM, an array job can be referenced as a whole, or by one of its tasks.
+
+Suppose you have a workflow that operates as an array job, and you'd like to profile memory usage across the many tasks. Suppose we've done an initial trial, setting memory relatively high just to get the jobs running without issues. One use of `job_report` could be to determine a better memory request in a data-driven way-- the better settings can then be run on the larger dataset after the initial test.
+
+On an actual system with SLURM installed, you'd normally run something like `job_df = job_report(slurm_job_id)` for the `slurm_job_id` (character or integer) representing the small test. For convenience, we'll start from the output of `job_report` as available in the `slurmjobs` package.
+
+```{r "job_report_quick_look"}
+job_df = readRDS(
+    system.file("extdata", "job_report_df.rds", package = "slurmjobs")
+)
+print(job_df)
+```
+
+Now let's choose a better memory request,
+
+```{r "job_report_adjust_mem"}
+stat_df = job_df |>
+    #   This example includes tasks that fail. We're only interested in memory
+    #   for successfully completed tasks
+    filter(status != 'FAILED') |>
+    summarize(
+        mean_mem = mean(max_vmem_gb),
+        std_mem = sd(max_vmem_gb),
+        max_mem = max(max_vmem_gb)
+    )
+
+#   We could choose a new memory request as 3 standard deviations above the mean
+#   of actual memory usage
+new_limit = stat_df$mean_mem + 3 * stat_df$std_mem
+
+print(
+    sprintf(
+        "%.02fG is a better memory request than %.02fG, which was used before",
+        new_limit,
+        job_df$requested_mem_gb[1]
+    )
+)
+```
+
 # Reproducibility
 
 The `r Biocpkg("slurmjobs")` package `r Citep(bib[["slurmjobs"]])` was made possible thanks to: