From 05fe3e0ae94645e895201032b295331996228fe1 Mon Sep 17 00:00:00 2001 From: JPRichings Date: Wed, 25 Sep 2024 13:18:11 +0100 Subject: [PATCH 1/5] New tests to check pm counters are accessible and reporting --- tests/env/counters_check.py | 153 ++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 tests/env/counters_check.py diff --git a/tests/env/counters_check.py b/tests/env/counters_check.py new file mode 100644 index 0000000..dfb73eb --- /dev/null +++ b/tests/env/counters_check.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +"""Reframe test to check that CPU target environment variable is correctly set""" + +# Based on work from: +# Copyright 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# SPDX-License-Identifier: BSD-3-Clause + +import reframe as rfm +import reframe.utility.sanity as sn + + +@rfm.simple_test +class CrayCountersEnergyTest(rfm.RunOnlyRegressionTest): + """Checks that the Node Energy counter is reporting""" + + descr = "Checks whether the node energy pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/energy" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that Energy is reported""" + return sn.assert_found(r"\S+ J \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersPowerTest(rfm.RunOnlyRegressionTest): + """Checks that the Node Power counter is reporting""" + + descr = "Checks whether the node power pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/power" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that Power is reporting""" + return sn.assert_found(r"\S+ W \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersCPUEnergyTest(rfm.RunOnlyRegressionTest): + """Checks that the CPU Energy counter is reporting""" + + descr = "Checks whether the cpu energy pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/cpu_energy" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that CPU Energy is reporting""" + return sn.assert_found(r"\S+ J \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersCPUPowerTest(rfm.RunOnlyRegressionTest): + """Checks that the CPU Power counter is reporting""" + + descr = "Checks whether the cpu power pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/cpu_power" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that CPU Power is reporting""" + return sn.assert_found(r"\S+ W \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersMemoryEnergyTest(rfm.RunOnlyRegressionTest): + """Checks that the Memory Energy counter is reporting""" + + descr = "Checks whether the memory energy pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/memory_energy" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that Memory Energy is reporting""" + return sn.assert_found(r"\S+ J \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersMemPowerTest(rfm.RunOnlyRegressionTest): + """Checks that the Memory Power counter is reporting""" + + descr = "Checks whether the memory power pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/memory_power" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that Memory Power is reporting""" + return sn.assert_found(r"\S+ W \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersCPU0TempTest(rfm.RunOnlyRegressionTest): + """Checks that the CPU 0 Tempreture counter is reporting""" + + descr = "Checks whether the cpu 0 tempreture pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/cpu0_temp" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that CPU 0 tempreture is reporting""" + return sn.assert_found(r"\S+ C \S+ us", self.stdout) + + +@rfm.simple_test +class CrayCountersCPU1TempTest(rfm.RunOnlyRegressionTest): + """Checks that the CPU 1 Tempreture counter is reporting""" + descr = "Checks whether the cpu 1 tempreture pm counter is accessible and reporting" + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + sourcesdir = None + executable = "cat /sys/cray/pm_counters/cpu1_temp" + + tags = {"production", "maintenance", "craype"} + + @sanity_function + def assert_finished(self): + """Sanity check that CPU 1 tempreture is reporting""" + return sn.assert_found(r"\S+ C \S+ us", self.stdout) From 87ae0482e28ea2c0dcfaf738b58d7627c714ee7a Mon Sep 17 00:00:00 2001 From: JPRichings Date: Wed, 25 Sep 2024 13:25:23 +0100 Subject: [PATCH 2/5] spelling --- tests/env/counters_check.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/env/counters_check.py b/tests/env/counters_check.py index dfb73eb..b70799f 100644 --- a/tests/env/counters_check.py +++ b/tests/env/counters_check.py @@ -120,9 +120,9 @@ def assert_finished(self): @rfm.simple_test class CrayCountersCPU0TempTest(rfm.RunOnlyRegressionTest): - """Checks that the CPU 0 Tempreture counter is reporting""" + """Checks that the CPU 0 Temperature counter is reporting""" - descr = "Checks whether the cpu 0 tempreture pm counter is accessible and reporting" + descr = "Checks whether the cpu 0 temperature pm counter is accessible and reporting" valid_systems = ["archer2:compute"] valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] sourcesdir = None @@ -132,14 +132,14 @@ class CrayCountersCPU0TempTest(rfm.RunOnlyRegressionTest): @sanity_function def assert_finished(self): - """Sanity check that CPU 0 tempreture is reporting""" + """Sanity check that CPU 0 temperature is reporting""" return sn.assert_found(r"\S+ C \S+ us", self.stdout) @rfm.simple_test class CrayCountersCPU1TempTest(rfm.RunOnlyRegressionTest): - """Checks that the CPU 1 Tempreture counter is reporting""" - descr = "Checks whether the cpu 1 tempreture pm counter is accessible and reporting" + """Checks that the CPU 1 Temperature counter is reporting""" + descr = "Checks whether the cpu 1 temperature pm counter is accessible and reporting" valid_systems = ["archer2:compute"] valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] sourcesdir = None @@ -149,5 +149,5 @@ class CrayCountersCPU1TempTest(rfm.RunOnlyRegressionTest): @sanity_function def assert_finished(self): - """Sanity check that CPU 1 tempreture is reporting""" + """Sanity check that CPU 1 temperature is reporting""" return sn.assert_found(r"\S+ C \S+ us", self.stdout) From f616b04ed9cfbcc320c9fe3fc04dfb06490e7dc2 Mon Sep 17 00:00:00 2001 From: JPRichings Date: Wed, 25 Sep 2024 13:33:28 +0100 Subject: [PATCH 3/5] style guide compliance --- tests/env/counters_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/env/counters_check.py b/tests/env/counters_check.py index b70799f..13d9508 100644 --- a/tests/env/counters_check.py +++ b/tests/env/counters_check.py @@ -54,8 +54,8 @@ class CrayCountersCPUEnergyTest(rfm.RunOnlyRegressionTest): valid_systems = ["archer2:compute"] valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] sourcesdir = None - executable = "cat /sys/cray/pm_counters/cpu_energy" - + executable = "cat /sys/cray/pm_counters/cpu_energy" + tags = {"production", "maintenance", "craype"} @sanity_function From 300d69aa8905d05fc7016e75a60b00e45bf2ece2 Mon Sep 17 00:00:00 2001 From: JPRichings Date: Wed, 25 Sep 2024 13:38:42 +0100 Subject: [PATCH 4/5] style guide compliance --- tests/env/counters_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/env/counters_check.py b/tests/env/counters_check.py index 13d9508..58fe678 100644 --- a/tests/env/counters_check.py +++ b/tests/env/counters_check.py @@ -139,6 +139,7 @@ def assert_finished(self): @rfm.simple_test class CrayCountersCPU1TempTest(rfm.RunOnlyRegressionTest): """Checks that the CPU 1 Temperature counter is reporting""" + descr = "Checks whether the cpu 1 temperature pm counter is accessible and reporting" valid_systems = ["archer2:compute"] valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] From 22cf054b5a120dd94484e7257f09117f4c6dce36 Mon Sep 17 00:00:00 2001 From: JPRichings Date: Wed, 25 Sep 2024 13:44:07 +0100 Subject: [PATCH 5/5] reduced prgenv's included to reduce test duplication --- tests/env/counters_check.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/env/counters_check.py b/tests/env/counters_check.py index 58fe678..902a1df 100644 --- a/tests/env/counters_check.py +++ b/tests/env/counters_check.py @@ -16,7 +16,7 @@ class CrayCountersEnergyTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the node energy pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/energy" @@ -34,7 +34,7 @@ class CrayCountersPowerTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the node power pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/power" @@ -52,7 +52,7 @@ class CrayCountersCPUEnergyTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the cpu energy pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/cpu_energy" @@ -70,7 +70,7 @@ class CrayCountersCPUPowerTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the cpu power pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/cpu_power" @@ -88,7 +88,7 @@ class CrayCountersMemoryEnergyTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the memory energy pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/memory_energy" @@ -106,7 +106,7 @@ class CrayCountersMemPowerTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the memory power pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/memory_power" @@ -124,7 +124,7 @@ class CrayCountersCPU0TempTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the cpu 0 temperature pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/cpu0_temp" @@ -142,7 +142,7 @@ class CrayCountersCPU1TempTest(rfm.RunOnlyRegressionTest): descr = "Checks whether the cpu 1 temperature pm counter is accessible and reporting" valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc"] + valid_prog_environs = ["PrgEnv-cray"] sourcesdir = None executable = "cat /sys/cray/pm_counters/cpu1_temp"