Skip to content

Commit

Permalink
Overall util/mem multiple functions
Browse files Browse the repository at this point in the history
  • Loading branch information
jdh4 committed Nov 2, 2024
1 parent e4452a8 commit 1fc095d
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 60 deletions.
145 changes: 92 additions & 53 deletions output_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ def output(self, no_color: bool=True) -> str:
def output_metadata(self) -> str:
pass

@abstractmethod
def output_overall_cpu_util(self) -> str:
pass

@abstractmethod
def output_overall_cpu_memory_usage(self) -> str:
pass

@abstractmethod
def output_overall_gpu_util(self) -> str:
pass

@abstractmethod
def output_overall_gpu_memory_usage(self) -> str:
pass

@staticmethod
def human_bytes(size: int, decimal_places=1) -> str:
size = float(size)
Expand Down Expand Up @@ -258,6 +274,7 @@ def __init__(self, js: Jobstats, width: int=80) -> None:
self.width = width

def output_metadata(self) -> str:
"""Return the job metadata."""
meta = f" Job ID: {self.txt_bold}{self.js.jobid}{self.txt_normal}\n"
meta += f" NetID/Account: {self.js.user}/{self.js.account}\n"
meta += f" Job Name: {self.js.jobname}\n"
Expand All @@ -278,6 +295,70 @@ def output_metadata(self) -> str:
meta += self.time_limit_formatted() + "\n"
return meta

def output_overall_cpu_util(self) -> str:
"""Return the overall CPU utilization."""
if self.js.cpu_util_error_code == 0:
total_used, total, _ = self.js.cpu_util_total__used_alloc_cores
self.js.cpu_efficiency = round(100 * total_used / total)
meter = self.draw_meter(self.js.cpu_efficiency, "cpu", util=True)
cpu_util = f" CPU utilization {meter}\n"
elif self.js.cpu_util_error_code == 1:
cpu_util = " CPU utilization (JSON is malformed)\n"
elif self.js.cpu_util_error_code == 2:
cpu_util = " CPU utilization (Value was erroneously found to be >100%)\n"
elif self.js.cpu_util_error_code == 3:
cpu_util = " CPU utilization (Total CPU time was found to be zero)\n"
else:
cpu_util = " CPU utilization (Something went wrong)\n"
return cpu_util

def output_overall_cpu_memory_usage(self) -> str:
"""Return the overall CPU memory usage."""
if self.js.cpu_mem_error_code == 0:
total_used, total, _ = self.js.cpu_mem_total__used_alloc_cores
self.js.cpu_memory_efficiency = round(100 * total_used / total)
meter = self.draw_meter(self.js.cpu_memory_efficiency, "cpu")
cpu_mem = f" CPU memory usage {meter}\n"
elif self.js.cpu_mem_error_code == 1:
cpu_mem = " CPU memory usage (JSON is malformed)\n"
elif self.js.cpu_mem_error_code == 2:
cpu_mem = " CPU memory usage (Value was erroneously found to be >100%)\n"
elif self.js.cpu_mem_error_code == 3:
cpu_mem = " CPU memory usage (Allocated memory was found to be zero)\n"
else:
cpu_mem = " CPU memory usage (Something went wrong)\n"
return cpu_mem

def output_overall_gpu_util(self) -> str:
"""Return the overall GPU utilization."""
if self.js.gpu_util_error_code == 0:
overall, overall_gpu_count = self.js.gpu_util_total__util_gpus
self.js.gpu_utilization = overall / overall_gpu_count
meter = self.draw_meter(round(self.js.gpu_utilization), "gpu", util=True)
gpu_util = f" GPU utilization {meter}\n"
elif self.js.gpu_util_error_code == 1:
gpu_util = " GPU utilization (Value is unknown)\n"
else:
gpu_util = " GPU utilization (Something went wrong)\n"
return gpu_util

def output_overall_gpu_memory_usage(self) -> str:
"""Return the overall GPU memory usage."""
if self.js.gpu_mem_error_code == 0:
overall, overall_total = self.js.gpu_mem_total__used_alloc
gpu_memory_usage = round(100 * overall / overall_total)
meter = self.draw_meter(gpu_memory_usage, "gpu")
gpu_mem = f" GPU memory usage {meter}\n"
elif self.js.gpu_mem_error_code == 1:
gpu_mem = " GPU memory usage (JSON is malformed)\n"
elif self.js.gpu_mem_error_code == 2:
gpu_mem = " GPU memory usage (Value was erroneously found to be >100%)\n"
elif self.js.gpu_mem_error_code == 3:
gpu_mem = " GPU memory usage (Allocated memory was found to be zero)\n"
else:
gpu_mem = " GPU memory usage (Something went wrong)\n"
return gpu_mem

def output(self, no_color: bool=True) -> str:
if blessed_is_available and not no_color:
term = Terminal()
Expand All @@ -293,71 +374,28 @@ def output(self, no_color: bool=True) -> str:
report += self.width * "=" + "\n"
report += self.output_metadata()
report += "\n"

########################################################################
# OVERALL UTILIZATION #
########################################################################
report += f" {self.txt_bold}Overall Utilization{self.txt_normal}\n"
heading = f"{self.txt_bold}Overall Utilization{self.txt_normal}"
report += heading.center(self.width) + "\n"
report += self.width * "=" + "\n"
# overall CPU time utilization
if self.js.cpu_util_error_code == 0:
total_used, total, _ = self.js.cpu_util_total__used_alloc_cores
self.js.cpu_efficiency = round(100 * total_used / total)
meter = self.draw_meter(self.js.cpu_efficiency, "cpu", util=True)
report += " CPU utilization " + meter + "\n"
elif self.js.cpu_util_error_code == 1:
report += " CPU utilization (JSON is malformed)\n"
elif self.js.cpu_util_error_code == 2:
report += " CPU utilization (Value was erroneously found to be >100%)\n"
elif self.js.cpu_util_error_code == 3:
report += " CPU utilization (Total CPU time was found to be zero)\n"
else:
report += " CPU utilization (Something went wrong)\n"
report += self.output_overall_cpu_util()
# overall CPU memory utilization
if self.js.cpu_mem_error_code == 0:
total_used, total, _ = self.js.cpu_mem_total__used_alloc_cores
self.js.cpu_memory_efficiency = round(100 * total_used / total)
meter = self.draw_meter(self.js.cpu_memory_efficiency, "cpu")
report += " CPU memory usage " + meter + "\n"
elif self.js.cpu_mem_error_code == 1:
report += " CPU memory usage (JSON is malformed)\n"
elif self.js.cpu_mem_error_code == 2:
report += " CPU memory usage (Value was erroneously found to be >100%)\n"
elif self.js.cpu_mem_error_code == 3:
report += " CPU memory usage (Allocated memory was found to be zero)\n"
else:
report += " CPU memory usage (Something went wrong)\n"
report += self.output_overall_cpu_memory_usage()
# GPUs
if self.js.gpus:
# overall GPU utilization
if self.js.gpu_util_error_code == 0:
overall, overall_gpu_count = self.js.gpu_util_total__util_gpus
self.js.gpu_utilization = overall / overall_gpu_count
meter = self.draw_meter(round(self.js.gpu_utilization), "gpu", util=True)
report += " GPU utilization " + meter + "\n"
elif self.js.gpu_util_error_code == 1:
report += " GPU utilization (Value is unknown)\n"
else:
report += " GPU utilization (Something went wrong)\n"
report += self.output_overall_gpu_util()
# overall GPU memory usage
if self.js.gpu_mem_error_code == 0:
overall, overall_total = self.js.gpu_mem_total__used_alloc
gpu_memory_usage = round(100 * overall / overall_total)
report += " GPU memory usage " + self.draw_meter(gpu_memory_usage, "gpu") + "\n"
elif self.js.gpu_mem_error_code == 1:
report += " GPU memory usage (JSON is malformed)\n"
elif self.js.gpu_mem_error_code == 2:
report += " GPU memory usage (Value was erroneously found to be >100%)\n"
elif self.js.gpu_mem_error_code == 3:
report += " GPU memory usage (Allocated memory was found to be zero)\n"
else:
report += " GPU memory usage (Something went wrong)\n"
report += self.output_overall_gpu_memory_usage()
report += "\n"

########################################################################
# DETAILED UTILIZATION #
########################################################################
report += f" {self.txt_bold}Detailed Utilization{self.txt_normal}\n"
heading = f"{self.txt_bold}Detailed Utilization{self.txt_normal}"
report += heading.center(self.width) + "\n"
report += self.width * "=" + "\n"
gutter = " "
# CPU time utilization
Expand Down Expand Up @@ -419,17 +457,18 @@ def output(self, no_color: bool=True) -> str:
gpu_errors = False
if self.js.gpus:
gpu_errors = bool(self.js.gpu_util_error_code > 0 or self.js.gpu_mem_error_code > 0)
heading = f"{self.txt_bold}Notes{self.txt_normal}"
if self.js.cpu_util_error_code == 0 and self.js.cpu_mem_error_code == 0 and not gpu_errors:
report += "\n"
notes = self.job_notes()
if notes:
report += f" {self.txt_bold}Notes{self.txt_normal}\n"
report += heading.center(self.width) + "\n"
report += self.width * "=" + "\n"
report += notes
return report
else:
report += "\n"
report += f" {self.txt_bold}Notes{self.txt_normal}\n"
report += heading.center(self.width) + "\n"
report += self.width * "=" + "\n"
if self.js.cpu_util_error_code:
report += f"{gutter}* The CPU utilization could not be determined.\n"
Expand Down
30 changes: 23 additions & 7 deletions tests/test_output_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
def simple_stats(mocker):
cols = ('JobIDRaw|Start|End|Cluster|AllocTRES|AdminComment|User|Account|'
'State|NNodes|NCPUS|ReqMem|QOS|Partition|TimelimitRaw|JobName\n')
ss64 = ('JS1:H4sIADelIWcC/1WNQQqDMBBF7zLrtEzG0ZhcphQzqGBM0bgQyd0bUii4fe8'
'//gVr9LKDuyDNo2yPibpBr00FMb2XV5AQtxOcRtMY1j0xKjh28X/TdsRMhRfxa9'
'IcBJyxbPsnKRg+R3lgzPk+ICSrYKwW8xcnjeJ8iwAAAA==')
ss64 = ('JS1:H4sIAPdcJmcC/1WNQQqDMBBF7zLrtEzG0ZhcphQzqGBM0bgQyd0bUii4fe8'
'//gVr9LKDuyDNo2yPibpBr00FMb2XV5AQtxOcRtMY1j0xKjh28X/TdsRMhRfxa9'
'IcBJyxbPsnKRg+R3lgzPk+ICSrYKwW8xcnjeJ8iwAAAA==')
Expand Down Expand Up @@ -111,6 +108,7 @@ def test_format_note(simple_stats):
expected = f" * {note}\n {url}\n\n"
assert formatter.format_note(note, url) == expected


def test_output_metadata(simple_stats):
formatter = ClassicOutput(simple_stats)
expected = """
Expand All @@ -128,7 +126,25 @@ def test_output_metadata(simple_stats):
Time Limit: 1-00:00:00
"""
actual = formatter.output_metadata()
for e, a in zip(expected.split("\n"), [""] + actual.split("\n")):
# avoid timezone complications
if "Start Time" not in e:
assert e.strip() == a.strip()
for a, e in zip([""] + actual.split("\n"), expected.split("\n")):
# avoid time zone complications
if "Start Time" not in a:
assert a.strip() == e.strip()


def test_output_overall_cpu_util(simple_stats):
formatter = ClassicOutput(simple_stats)
assert formatter.js.cpu_util_error_code == 0
actual = formatter.output_overall_cpu_util()
expected = " CPU utilization "
expected += "[|||||||||||||||||||||||||||||||||||||||||||||||98%]\n"
assert actual == expected


def test_output_overall_cpu_memory_usage(simple_stats):
formatter = ClassicOutput(simple_stats)
assert formatter.js.cpu_mem_error_code == 0
actual = formatter.output_overall_cpu_memory_usage()
expected = " CPU memory usage "
expected += "[|||||||||||||||||||||||||| 52%]\n"
assert actual == expected

0 comments on commit 1fc095d

Please sign in to comment.