diff --git a/tests/functional/README.md b/tests/functional/README.md index 8f4e2fa..ac84c16 100644 --- a/tests/functional/README.md +++ b/tests/functional/README.md @@ -1,7 +1,9 @@ # Functional Tests for the Hardware Observer Charm -There are 2 main types of functional tests for the Hardware Observer charm - those which depend on real hardware to be present and those that can run without it. +There are 2 main types of functional tests for the Hardware Observer charm - those which depend on +real hardware to be present and those that can run without it. -Here, "real hardware" refers to machines that are not VMs or containers and have access to real hardware resources like RAID cards and BMC management tools. +Here, "real hardware" refers to machines that are not VMs or containers and have access to real +hardware resources like RAID cards and BMC management tools. Note: the built charm must be present in the root of the project's directory for the tests to run. @@ -16,63 +18,72 @@ These include: Running these tests is as simple as executing the `tox -e func -- -v` ## Hardware Dependent Tests -These are the tests that depend on real hardware to be executed. This is performed manually when required, for example - validating the charm's full functionality before a new release. +These are the tests that depend on real hardware to be executed. This is performed manually when +required, for example - validating the charm's full functionality before a new release. -Hardware dependent tests are present in the `TestCharmWithHW` class in the `test_charm.py` module. The pytest marker `realhw` has been added to this class (which would include all the tests in this class). +Hardware dependent tests are present in the `TestCharmWithHW` class in the `test_charm.py` module. +The pytest marker `realhw` has been added to this class (which would include all the tests in this +class). -These tests will only be executed if the `--realhw` option for pytest is provided. Additionally, the `--collectors` option with space separated values can be provided, if specific hardware is present. Check the `conftest.py` for options. Otherwise, all these tests are skipped (this is done by checking for the presence of the `realhw` marker mentioned earlier.) +These tests will only be executed if the `--realhw` option for pytest is provided. Additionally, +the `--collectors` option with space separated values can be provided, if specific hardware is +present. Check the `conftest.py` for options. Otherwise, all these tests are skipped (this is done +by checking for the presence of the `realhw` marker mentioned earlier.) -Note: The operator must set up a test model with the machine added beforehand. The machine must be an actual host, containers or VMs won't work. +Note: The operator must set up a test model with the machine added beforehand. The machine must be +an actual host, containers or VMs won't work. +Note: depending on the test, certain prerequisites are needed, e.g. having set up an nvidia driver. +Check the tests' docstrings for details. Some of these tests include: * Check if all collectors are detected in the exporter config file * Test if metrics are available at the expected endpoint * Test if metrics specific to the collectors being tested are available * Test if smarctl-exporter snap is installed and running -* Test if Nvidia drivers and dcgm-exporter snap are installed +* Test if the dcgm snap is installed and more. -In order to run these tests, a couple of prerequisite steps need to be completed. +In order to run these tests, several prerequisites may need to be completed. 1. Setup test environment -2. Add environment variables for Redfish credentials. -3. Setup required resource files -4. Determine if the machine has Nvidia GPUs and add the `--nvidia` flag is present. -5. Find supported collectors +1. Build the charm +1. Add environment variables for Redfish credentials (if testing redfish). +1. Setup required resource files (if testing hardware raid). +1. Install the NVIDIA gpu driver and add the `--nvidia` flag (if testing NVIDIA gpu observability). +1. Find supported collectors ### 1. Setup test environment -For the hardware dependent tests, we add the test machine beforehand and the bundle only handles deploying the applications to this machine. -We would need 2 machines which are in the same network. One of them will be bootstrapped as a controller (can be VM or container) for the juju manual cloud we will be creating and the other will be added as a machine into the model. +You can refer to dev-environment.md here, up to the "Add physical machine" section included. +The end result should be a test model with a manually provisioned machine listed: -A basic outline of the steps would look like: ``` -# Add manual cloud to juju -$ juju add-cloud manual-cloud --client +$ juju status +Model Controller Cloud/Region Version SLA Timestamp +test lxd-controller localhost/localhost 3.6.1 unsupported 01:39:10Z -Select cloud type: manual -ssh connection string for controller: user@$IP_CONTROLLER - -# Bootstrap controller on the machine -$ juju bootstrap manual-cloud manual-controller +Machine State Address Inst id Base AZ Message +0 started 10.239.17.1 manual:10.239.17.1 ubuntu@22.04 Manually provisioned machine +``` -# Add model and machine -$ juju add-model test +### 2. Build the charm -$ juju add-machine ssh:user@IP_MACHINE_FOR_TESTING -``` +Just run `charmcraft pack` from the project directory. -### 2. Add environment variables for Redfish credentials -As part of the redfish collector specific tests, redfish credentials need to be provided for authentication. +### 3. Add environment variables for Redfish credentials +As part of the redfish collector specific tests, redfish credentials need to be provided for +authentication. Therefore, the test expects these environment variables to be set: * `REDFISH_USERNAME` * `REDFISH_PASSWORD` -### 3. Setup required resource files +### 4. Setup required resource files Create a new `resources` directory in the root of the project. -Check which collectors are supported on the machine and verify if they need to be manually downloaded (refer https://charmhub.io/hardware-observer/resources/). -Download the required resource files from their respective third-party websites and add the extracted `.deb` file or binary to this directory. +Check which collectors are supported on the machine and verify if they need to be manually +downloaded (refer https://charmhub.io/hardware-observer/resources/). Download the required +resource files from their respective third-party websites and add the extracted `.deb` file or +binary to this directory. Note: The tests expect these resources to be named exactly in the manner provided below: * storcli.deb @@ -81,22 +92,28 @@ Note: The tests expect these resources to be named exactly in the manner provide * sas3ircu ### 4. Find supported collectors -Note down all the collectors supported by the machine as they need to be provided to pytest as part of its CLI arguments. +Note down all the collectors supported by the machine as they need to be provided to pytest as part +of its CLI arguments. -This is done by passing the required collectors in a space-separated manner via `--collector` option to the tox target. +This is done by passing the required collectors in a space-separated manner via `--collector` +option to the tox target. -The supported collectors can be found by checking the output of the `lshw` command (for RAID cards) or checking availability of Redfish and IPMI on the BMC. +The supported collectors can be found by checking the output of the `lshw` command (for RAID cards) +or checking availability of Redfish and IPMI on the BMC. --- ### Running the tests -After ensuring the prerequisite steps are complete, the final command to run the tests would look something like this: +After ensuring the prerequisite steps are complete, the final command to run the tests would look +something like this: + ``` -tox -e func -- -v --realhw --model test --collectors ipmi_dcmi ipmi_sel ipmi_sensor redfish mega_raid --keep-models +tox -e func -- -v --realhw --model test --collectors ipmi_dcmi ipmi_sel ipmi_sensor redfish mega_raid --nvidia --keep-models ``` -This would pass the required collectors to tox which then sends it to the pytest command and starts the hardware dependent tests. +This would pass the required collectors to tox which then sends it to the pytest command and starts +the hardware dependent tests. ### Troubleshooting diff --git a/tests/functional/bundle.yaml.j2 b/tests/functional/bundle.yaml.j2 index f23a5bc..d696f67 100644 --- a/tests/functional/bundle.yaml.j2 +++ b/tests/functional/bundle.yaml.j2 @@ -23,6 +23,8 @@ applications: {% endif %} hardware-observer: charm: {{ charm }} + options: + redfish-disable: {{ redfish_disable }} relations: - - grafana-agent:juju-info diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index b29025a..08bddaf 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -1,8 +1,11 @@ +import inspect import logging +import os import platform from pathlib import Path import pytest +from pytest_operator.plugin import OpsTest from utils import RESOURCES_DIR, Resource from config import HARDWARE_EXPORTER_COLLECTOR_MAPPING, TPR_RESOURCES, HWTool @@ -50,6 +53,33 @@ def pytest_addoption(parser): ) +def get_this_script_dir() -> Path: + filename = inspect.getframeinfo(inspect.currentframe()).filename # type: ignore[arg-type] + path = os.path.dirname(os.path.abspath(filename)) + return Path(path) + + +@pytest.fixture(scope="module") +def bundle(ops_test: OpsTest, request, charm_path, base, provided_collectors): + """Configure the bundle depending on cli arguments.""" + bundle_template_path = get_this_script_dir() / "bundle.yaml.j2" + log.info("Rendering bundle %s", bundle_template_path) + bundle = ops_test.render_bundle( + bundle_template_path, + charm=charm_path, + base=base, + redfish_disable=("redfish" not in provided_collectors), + resources={ + "storcli-deb": "empty-resource", + "perccli-deb": "empty-resource", + "sas2ircu-bin": "empty-resource", + "sas3ircu-bin": "empty-resource", + }, + ) + + return bundle + + @pytest.fixture(scope="module") def base(request): return request.config.getoption("--base") @@ -158,7 +188,7 @@ def required_resources(resources: list[Resource], provided_collectors: set) -> l return required_resources -@pytest.fixture() +@pytest.fixture(scope="module") def charm_path(base: str, architecture: str) -> Path: """Fixture to determine the charm path based on the base and architecture.""" glob_path = f"hardware-observer_*{base.replace('@', '-')}-{architecture}*.charm" diff --git a/tests/functional/test_charm.py b/tests/functional/test_charm.py index 2e5bad2..ad068d2 100644 --- a/tests/functional/test_charm.py +++ b/tests/functional/test_charm.py @@ -3,7 +3,6 @@ # See LICENSE file for licensing details. import asyncio -import inspect import logging import os from enum import Enum @@ -37,12 +36,6 @@ TIMEOUT = 600 -def get_this_script_dir() -> Path: - filename = inspect.getframeinfo(inspect.currentframe()).filename # type: ignore[arg-type] - path = os.path.dirname(os.path.abspath(filename)) - return Path(path) - - class AppStatus(str, Enum): """Various workload status messages for the app.""" @@ -59,7 +52,7 @@ class AppStatus(str, Enum): @pytest.mark.abort_on_fail @pytest.mark.skip_if_deployed async def test_build_and_deploy( # noqa: C901, function is too complex - ops_test: OpsTest, base, architecture, realhw, required_resources, charm_path + ops_test: OpsTest, base, architecture, realhw, required_resources, bundle ): """Deploy the charm together with related charms. @@ -71,21 +64,6 @@ async def test_build_and_deploy( # noqa: C901, function is too complex # See issue: https://bugs.launchpad.net/juju/+bug/2067749 await ops_test.model.set_constraints({"arch": architecture}) - bundle_template_path = get_this_script_dir() / "bundle.yaml.j2" - - logger.info("Rendering bundle %s", bundle_template_path) - bundle = ops_test.render_bundle( - bundle_template_path, - charm=charm_path, - base=base, - resources={ - "storcli-deb": "empty-resource", - "perccli-deb": "empty-resource", - "sas2ircu-bin": "empty-resource", - "sas3ircu-bin": "empty-resource", - }, - ) - juju_cmd = ["deploy", "-m", ops_test.model_full_name, str(bundle)] # deploy bundle to already added machine instead of provisioning new one @@ -413,7 +391,10 @@ async def test_smarctl_exporter_snap_available(self, ops_test, app, unit): assert results.get("stdout").strip() == "active" async def test_dcgm_exporter_snap_available(self, ops_test, app, unit, nvidia_present): - """Test if dcgm exporter snap is installed and ranning on the unit.""" + """Test if dcgm exporter snap is installed and ranning on the unit. + + NOTE: this test requires installing the correct nvidia driver beforehand. + """ if not nvidia_present: pytest.skip("dcgm not in provided collectors, skipping test") diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 70df8af..83a8c4f 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -63,7 +63,7 @@ async def run_command_on_unit(ops_test, unit_name, command): async def get_hardware_exporter_config(ops_test, unit_name) -> dict: """Return hardware-exporter config from endpoint on unit.""" - command = "cat /etc/hardware-exporter/config.yaml" + command = "cat /etc/hardware-exporter-config.yaml" results = await run_command_on_unit(ops_test, unit_name, command) if results.get("return-code") > 0: raise HardwareExporterConfigError