From f34eb94ef29f87c47620f50597ee49b7c5f2f652 Mon Sep 17 00:00:00 2001 From: Ryo Ficano Date: Mon, 9 Sep 2024 13:43:03 -0500 Subject: [PATCH] [SWDEV-482963] [Test updates] Add new tests for p0 items - BM Change-Id: I3266ff7ab14959f1824f408a44e82b861d88d61f Signed-off-by: Ryo Ficano --- pytest/integration_test.py | 163 +++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/pytest/integration_test.py b/pytest/integration_test.py index 2a336732..4c160f65 100755 --- a/pytest/integration_test.py +++ b/pytest/integration_test.py @@ -399,6 +399,169 @@ def test_temperature_edge(self): def test_walkthrough(self): walk_through(self) + # Not supported in Navi21 + @handle_exceptions + def test_block_ecc_ras(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + gpu_blocks = { + "INVALID": amdsmi.AmdSmiGpuBlock.INVALID, + "UMC": amdsmi.AmdSmiGpuBlock.UMC, + "SDMA": amdsmi.AmdSmiGpuBlock.SDMA, + "GFX": amdsmi.AmdSmiGpuBlock.GFX, + "MMHUB": amdsmi.AmdSmiGpuBlock.MMHUB, + "ATHUB": amdsmi.AmdSmiGpuBlock.ATHUB, + "PCIE_BIF": amdsmi.AmdSmiGpuBlock.PCIE_BIF, + "HDP": amdsmi.AmdSmiGpuBlock.HDP, + "XGMI_WAFL": amdsmi.AmdSmiGpuBlock.XGMI_WAFL, + "DF": amdsmi.AmdSmiGpuBlock.DF, + "SMN": amdsmi.AmdSmiGpuBlock.SMN, + "SEM": amdsmi.AmdSmiGpuBlock.SEM, + "MP0": amdsmi.AmdSmiGpuBlock.MP0, + "MP1": amdsmi.AmdSmiGpuBlock.MP1, + "FUSE": amdsmi.AmdSmiGpuBlock.FUSE, + "MCA": amdsmi.AmdSmiGpuBlock.MCA, + "VCN": amdsmi.AmdSmiGpuBlock.VCN, + "JPEG": amdsmi.AmdSmiGpuBlock.JPEG, + "IH": amdsmi.AmdSmiGpuBlock.IH, + "MPIO": amdsmi.AmdSmiGpuBlock.MPIO, + "RESERVED": amdsmi.AmdSmiGpuBlock.RESERVED + } + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_gpu_ecc_count \n") + for block_name, block_code in gpu_blocks.items(): + ecc_count = amdsmi.amdsmi_get_gpu_ecc_count( + processors[i], block_code, ) + print(" Number of uncorrectable errors for {}: {}".format( + block_name, ecc_count['uncorrectable_count'])) + print(" Number of correctable errors for {}: {}".format( + block_name, ecc_count['correctable_count'])) + print(" Number of deferred errors for {}: {}".format( + block_name, ecc_count['deferred_count'])) + self.assertGreaterEqual(ecc_count['uncorrectable_count'], 0) + self.assertGreaterEqual(ecc_count['correctable_count'], 0) + self.assertGreaterEqual(ecc_count['deferred_count'], 0) + print("\n###Test amdsmi_get_gpu_ras_block_features_enabled \n") + ras_enabled = amdsmi.amdsmi_get_gpu_ras_block_features_enabled( + processors[i], block_code) + print(" RAS enabled for {}: {}".format( + block_name, ras_enabled)) + print() + print() + + # TO DO + @handle_exceptions + def test_gpu_utilization(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + utilization_counter = { + "COARSE_GRAIN_GFX_ACTIVITY": amdsmi.AmdSmiUtilizationCounterType.COARSE_GRAIN_GFX_ACTIVITY, + "COARSE_GRAIN_MEM_ACTIVITY": amdsmi.AmdSmiUtilizationCounterType.COARSE_GRAIN_MEM_ACTIVITY, + "COARSE_DECODER_ACTIVITY": amdsmi.AmdSmiUtilizationCounterType.COARSE_DECODER_ACTIVITY, + "FINE_GRAIN_GFX_ACTIVITY": amdsmi.AmdSmiUtilizationCounterType.FINE_GRAIN_GFX_ACTIVITY, + "FINE_GRAIN_MEM_ACTIVITY": amdsmi.AmdSmiUtilizationCounterType.FINE_GRAIN_MEM_ACTIVITY, + "FINE_DECODER_ACTIVITY": amdsmi.AmdSmiUtilizationCounterType.FINE_DECODER_ACTIVITY, + "UTILIZATION_COUNTER_FIRST": amdsmi.AmdSmiUtilizationCounterType.UTILIZATION_COUNTER_FIRST, + "UTILIZATION_COUNTER_LAST": amdsmi.AmdSmiUtilizationCounterType.UTILIZATION_COUNTER_LAST + } + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_utilization_count \n") + # for counter_name, counter_code in utilization_counter.items(): + utilization_count = amdsmi.amdsmi_get_utilization_count( + processors[i], utilization_counter["COARSE_GRAIN_GFX_ACTIVITY"]) + print(" Utilization count for {} is: {} %".format( + "UTILIZATION_COUNTER_FIRST", utilization_count)) + print() + + def test_process_list(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_gpu_process_list \n") + process_list = amdsmi.amdsmi_get_gpu_process_list(processors[i]) + print(" Process list: {}".format(process_list)) + print() + + def test_socket_info(self): + sockets = amdsmi.amdsmi_get_socket_handles() + for i in range(0, len(sockets)): + print("\n\n###Test Socket {}".format(i)) + print("\n###Test amdsmi_get_socket_handles and amdsmi_get_socket_info \n") + socket_name = amdsmi.amdsmi_get_socket_info(sockets[i]) + print(" Socket: {}".format(socket_name)) + print() + + def test_processor_type(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_processor_type \n") + processor_type = amdsmi.amdsmi_get_processor_type(processors[i]) + print(" Processor type is: {}".format(processor_type['processor_type'])) + print() + + def test_clk_frequency(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_clk_freq \n") + clock_frequency = amdsmi.amdsmi_get_clk_freq( + processors[i], amdsmi.AmdSmiClkType.SYS) + print(" Clock frequency for SYS is: {}".format(clock_frequency)) + clock_frequency = amdsmi.amdsmi_get_clk_freq( + processors[i], amdsmi.AmdSmiClkType.DF) + print(" Clock frequency for DF is: {}".format(clock_frequency)) + clock_frequency = amdsmi.amdsmi_get_clk_freq( + processors[i], amdsmi.AmdSmiClkType.DCEF) + print(" Clock frequency for DCEF is: {}".format(clock_frequency)) + print() + + def test_memory(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_gpu_memory_usage \n") + memory_usage = amdsmi.amdsmi_get_gpu_memory_usage( + processors[i], amdsmi.AmdSmiMemoryType.VRAM) + print(" Memory usage for VRAM is: {}".format(memory_usage)) + memory_usage = amdsmi.amdsmi_get_gpu_memory_usage( + processors[i], amdsmi.AmdSmiMemoryType.VIS_VRAM) + print(" Memory usage for VIS_VRAM is: {}".format(memory_usage)) + memory_usage = amdsmi.amdsmi_get_gpu_memory_usage( + processors[i], amdsmi.AmdSmiMemoryType.GTT) + print(" Memory usage for GTT is: {}".format(memory_usage)) + print() + + def test_vendor_name(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + print("\n\n###Test Processor {}, bdf: {}".format(i, bdf)) + print("\n###Test amdsmi_get_gpu_vendor_name \n") + vendor_name = amdsmi.amdsmi_get_gpu_vendor_name(processors[i]) + print(" Vendor name is: {}".format(vendor_name)) + print() + # Unstable on workstation cards # @handle_exceptions # def test_walkthrough_multiprocess(self):