diff --git a/tests/functional/README.md b/tests/functional/README.md index 5992a576..8f4e2fa7 100644 --- a/tests/functional/README.md +++ b/tests/functional/README.md @@ -3,33 +3,33 @@ There are 2 main types of functional tests for the Hardware Observer charm - tho Here, "real hardware" refers to machines that are not VMs or containers and have access to real hardware resources like RAID cards and BMC management tools. +Note: the built charm must be present in the root of the project's directory for the tests to run. + ## Hardware Independent Tests These are the tests for hardware observer that do not require any real hardware. -Hardware independent tests are run on every PR / weekly scheduled test run. They belong to the `TestCharm` class in the `test_charm.py` module. +Hardware independent tests are run on every PR / weekly scheduled test run. These include: * Testing whether juju config changes produce the required results -* Check whether the exporter systemd service starts and stops correctly -* Test exporter is stopped and related files removed on removal of charm - -and more. -Running these tests is as simple as executing the `make functional` command. +Running these tests is as simple as executing the `tox -e func -- -v` ## Hardware Dependent Tests These are the tests that depend on real hardware to be executed. This is performed manually when required, for example - validating the charm's full functionality before a new release. Hardware dependent tests are present in the `TestCharmWithHW` class in the `test_charm.py` module. The pytest marker `realhw` has been added to this class (which would include all the tests in this class). -These tests will only be executed if the `--collectors` option for pytest is provided some value. Otherwise, all these tests are skipped (this is done by checking for the presence of the `realhw` marker mentioned earlier.) +These tests will only be executed if the `--realhw` option for pytest is provided. Additionally, the `--collectors` option with space separated values can be provided, if specific hardware is present. Check the `conftest.py` for options. Otherwise, all these tests are skipped (this is done by checking for the presence of the `realhw` marker mentioned earlier.) -Note: The `test_build_and_deploy` function sets up the test environment for both types of tests. +Note: The operator must set up a test model with the machine added beforehand. The machine must be an actual host, containers or VMs won't work. Some of these tests include: * Check if all collectors are detected in the exporter config file * Test if metrics are available at the expected endpoint * Test if metrics specific to the collectors being tested are available +* Test if smarctl-exporter snap is installed and running +* Test if Nvidia drivers and dcgm-exporter snap are installed and more. @@ -37,7 +37,8 @@ In order to run these tests, a couple of prerequisite steps need to be completed 1. Setup test environment 2. Add environment variables for Redfish credentials. 3. Setup required resource files -4. Find supported collectors +4. Determine if the machine has Nvidia GPUs and add the `--nvidia` flag is present. +5. Find supported collectors ### 1. Setup test environment For the hardware dependent tests, we add the test machine beforehand and the bundle only handles deploying the applications to this machine. @@ -82,7 +83,7 @@ Note: The tests expect these resources to be named exactly in the manner provide ### 4. Find supported collectors Note down all the collectors supported by the machine as they need to be provided to pytest as part of its CLI arguments. -This is done by passing the required collectors in a space-separated manner via the `FUNC_ARGS` environment variable to the make target. +This is done by passing the required collectors in a space-separated manner via `--collector` option to the tox target. The supported collectors can be found by checking the output of the `lshw` command (for RAID cards) or checking availability of Redfish and IPMI on the BMC. @@ -92,7 +93,7 @@ The supported collectors can be found by checking the output of the `lshw` comma After ensuring the prerequisite steps are complete, the final command to run the tests would look something like this: ``` -FUNC_ARGS="--model test --collectors ipmi_dcmi ipmi_sel ipmi_sensor redfish mega_raid" make functional +tox -e func -- -v --realhw --model test --collectors ipmi_dcmi ipmi_sel ipmi_sensor redfish mega_raid --keep-models ``` This would pass the required collectors to tox which then sends it to the pytest command and starts the hardware dependent tests. diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index fc6ec0c6..b29025aa 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -1,5 +1,6 @@ import logging import platform +from pathlib import Path import pytest from utils import RESOURCES_DIR, Resource @@ -18,6 +19,18 @@ def pytest_addoption(parser): help="Set base for the applications.", ) + parser.addoption( + "--realhw", + action="store_true", + help="Enable real hardware testing.", + ) + + parser.addoption( + "--nvidia", + action="store_true", + help="Enable NVIDIA GPU support for testing with real hardware.", + ) + parser.addoption( "--collectors", nargs="+", @@ -42,6 +55,16 @@ def base(request): return request.config.getoption("--base") +@pytest.fixture(scope="module") +def nvidia_present(request): + return request.config.getoption("--nvidia") + + +@pytest.fixture(scope="module") +def realhw(request): + return request.config.getoption("--realhw") + + @pytest.fixture(scope="module") def architecture(): machine = platform.machine() @@ -60,20 +83,7 @@ def pytest_configure(config): def pytest_collection_modifyitems(config, items): - if config.getoption("collectors"): - # --collectors provided, skip hw independent tests - skip_hw_independent = pytest.mark.skip( - reason="Hardware independent tests are skipped since --collectors was provided." - ) - for item in items: - # skip TestCharm tests where "realhw" marker is not present - # we don't want to skip test_setup_and_build, test_required_resources, - # test_cos_agent_relation and test_redfish_credential_validation - # even for hw independent tests - # so we also check for the abort_on_fail marker - if "realhw" not in item.keywords and "abort_on_fail" not in item.keywords: - item.add_marker(skip_hw_independent) - else: + if not config.getoption("--realhw"): # skip hw dependent tests in TestCharmWithHW marked with "realhw" skip_hw_dependent = pytest.mark.skip( reason="Hardware dependent test. Provide collectors with the --collectors option." @@ -100,7 +110,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.STORCLI), file_name="storcli.deb", - collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.STORCLI)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.STORCLI).replace( "collector.", "" ), bin_name=HWTool.STORCLI.value, @@ -108,7 +118,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.PERCCLI), file_name="perccli.deb", - collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.PERCCLI)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.PERCCLI).replace( "collector.", "" ), bin_name=HWTool.PERCCLI.value, @@ -116,7 +126,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.SAS2IRCU), file_name="sas2ircu", - collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS2IRCU)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS2IRCU).replace( "collector.", "" ), bin_name=HWTool.SAS2IRCU.value, @@ -124,7 +134,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.SAS3IRCU), file_name="sas3ircu", - collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS3IRCU)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS3IRCU).replace( "collector.", "" ), bin_name=HWTool.SAS3IRCU.value, @@ -146,3 +156,23 @@ def required_resources(resources: list[Resource], provided_collectors: set) -> l required_resources.append(resource) return required_resources + + +@pytest.fixture() +def charm_path(base: str, architecture: str) -> Path: + """Fixture to determine the charm path based on the base and architecture.""" + glob_path = f"hardware-observer_*{base.replace('@', '-')}-{architecture}*.charm" + paths = list(Path(".").glob(glob_path)) + + if not paths: + raise FileNotFoundError(f"The path for the charm for {base}-{architecture} is not found.") + + if len(paths) > 1: + raise FileNotFoundError( + f"Multiple charms found for {base}-{architecture}. Please provide only one." + ) + + # The bundle will need the full path to the charm + path = paths[0].absolute() + log.info(f"Using charm path: {path}") + return path diff --git a/tests/functional/test_charm.py b/tests/functional/test_charm.py index 45a50fae..aba5b4fe 100644 --- a/tests/functional/test_charm.py +++ b/tests/functional/test_charm.py @@ -16,8 +16,11 @@ from tenacity import AsyncRetrying, RetryError, stop_after_attempt, wait_fixed from utils import ( RESOURCES_DIR, + HardwareExporterConfigError, MetricsFetchError, assert_metrics, + assert_snap_installed, + get_hardware_exporter_config, get_metrics_output, run_command_on_unit, ) @@ -56,17 +59,13 @@ class AppStatus(str, Enum): @pytest.mark.abort_on_fail @pytest.mark.skip_if_deployed async def test_build_and_deploy( # noqa: C901, function is too complex - ops_test: OpsTest, base, architecture, provided_collectors, required_resources + ops_test: OpsTest, base, architecture, realhw, required_resources, charm_path ): - """Build the charm-under-test and deploy it together with related charms. + """Deploy the charm together with related charms. Assert on the unit status before any relations/configurations take place. Optionally attach required resources when testing with real hardware. """ - # Build and deploy charm from local source folder - charm = await ops_test.build_charm(".") - assert charm, "Charm was not built successfully." - # This is required for subordinate appliation to choose right revison # on different architecture. # See issue: https://bugs.launchpad.net/juju/+bug/2067749 @@ -77,7 +76,7 @@ async def test_build_and_deploy( # noqa: C901, function is too complex logger.info("Rendering bundle %s", bundle_template_path) bundle = ops_test.render_bundle( bundle_template_path, - charm=charm, + charm=charm_path, base=base, resources={ "storcli-deb": "empty-resource", @@ -91,7 +90,7 @@ async def test_build_and_deploy( # noqa: C901, function is too complex # deploy bundle to already added machine instead of provisioning new one # when testing with real hardware - if provided_collectors: + if realhw: juju_cmd.append("--map-machines=existing") logging.info("Deploying bundle...") @@ -128,7 +127,7 @@ async def test_build_and_deploy( # noqa: C901, function is too complex @pytest.mark.abort_on_fail -async def test_required_resources(ops_test: OpsTest, provided_collectors, required_resources): +async def test_required_resources(ops_test: OpsTest, required_resources): if not required_resources: pytest.skip("No required resources to be attached, skipping test") @@ -162,7 +161,21 @@ async def test_required_resources(ops_test: OpsTest, provided_collectors, requir assert unit.workload_status_message == AppStatus.MISSING_RELATION +@pytest.mark.abort_on_fail @pytest.mark.realhw +async def test_nvidia_driver_installation(ops_test: OpsTest, nvidia_present, unit): + """Test nvidia driver installation.""" + if not nvidia_present: + pytest.skip("dcgm not in provided collectors, skipping test") + + check_nvidia_driver_cmd = "cat /proc/driver/nvidia/version" + results = await run_command_on_unit(ops_test, unit.name, check_nvidia_driver_cmd) + exists = results.get("return-code") == 0 + + if not exists: + pytest.fail("Error occured during the driver installation. Check the logs.") + + @pytest.mark.abort_on_fail async def test_cos_agent_relation(ops_test: OpsTest, provided_collectors): """Test adding relation with grafana-agent.""" @@ -193,9 +206,10 @@ async def test_cos_agent_relation(ops_test: OpsTest, provided_collectors): # Test with cos-agent relation logging.info("Check whether hardware-exporter is active after creating relation.") for unit in ops_test.model.applications[APP_NAME].units: - results = await run_command_on_unit(ops_test, unit.name, check_active_cmd) - assert results.get("return-code") == 0 - assert results.get("stdout").strip() == "active" + if provided_collectors: + results = await run_command_on_unit(ops_test, unit.name, check_active_cmd) + assert results.get("return-code") == 0 + assert results.get("stdout").strip() == "active" if redfish_present: assert unit.workload_status_message == AppStatus.INVALID_REDFISH_CREDS else: @@ -229,42 +243,144 @@ async def test_redfish_credential_validation(ops_test: OpsTest, provided_collect class TestCharmWithHW: """Run functional tests that require specific hardware.""" - async def test_config_collector_enabled(self, app, unit, ops_test, provided_collectors): - """Test whether provided collectors are present in exporter config.""" - cmd = "cat /etc/hardware-exporter-config.yaml" + async def test_config_file_permissions(self, unit, ops_test, provided_collectors): + """Check config file permissions are set correctly.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + expected_file_mode = "600" + cmd = "stat -c '%a' /etc/hardware-exporter-config.yaml" results = await run_command_on_unit(ops_test, unit.name, cmd) assert results.get("return-code") == 0 - config = yaml.safe_load(results.get("stdout").strip()) - collectors_in_config = { - collector.replace("collector.", "") for collector in config.get("enable_collectors") - } - error_msg = ( - f"Provided collectors {provided_collectors} are different from" - f" enabled collectors in config {collectors_in_config}" + assert results.get("stdout").rstrip("\n") == expected_file_mode + + async def test_config_changed_port(self, app, unit, ops_test, provided_collectors): + """Test changing the config option: hardware-exporter-port.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + new_port = "10001" + await asyncio.gather( + app.set_config({"hardware-exporter-port": new_port}), + ops_test.model.wait_for_idle(apps=[APP_NAME]), ) - assert provided_collectors == collectors_in_config, error_msg - async def test_redfish_client_timeout_config(self, app, unit, ops_test, provided_collectors): - """Test whether the redfish client's timeout depends on collect-timeout charm config.""" - if "redfish" not in provided_collectors: - pytest.skip("redfish not in provided collectors, skipping test") + try: + config = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config["port"] == int(new_port) - new_timeout = "20" + await app.reset_config(["hardware-exporter-port"]) + + async def test_no_redfish_config(self, unit, ops_test, provided_collectors): + """Test that there is no Redfish options because it's not available on lxd machines.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + try: + config = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config.get("redfish_host") is None + assert config.get("redfish_username") is None + assert config.get("redfish_client_timeout") is None + + async def test_config_changed_log_level(self, app, unit, ops_test, provided_collectors): + """Test changing the config option: exporter-log-level.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + new_log_level = "DEBUG" await asyncio.gather( - app.set_config({"collect-timeout": new_timeout}), + app.set_config({"exporter-log-level": new_log_level}), ops_test.model.wait_for_idle(apps=[APP_NAME]), ) - cmd = "cat /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, unit.name, cmd) - assert results.get("return-code") == 0 - config = yaml.safe_load(results.get("stdout").strip()) - assert config["redfish_client_timeout"] == int(new_timeout) + try: + config = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config["level"] == new_log_level + + await app.reset_config(["exporter-log-level"]) + + async def test_config_changed_collect_timeout(self, app, unit, ops_test, provided_collectors): + """Test changing the config option: collect-timeout.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + new_collect_timeout = "20" + await asyncio.gather( + app.set_config({"collect-timeout": new_collect_timeout}), + ops_test.model.wait_for_idle(apps=[APP_NAME]), + ) + + try: + config = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config["collect_timeout"] == int(new_collect_timeout) await app.reset_config(["collect-timeout"]) - async def test_metrics_available(self, app, unit, ops_test): + async def test_start_and_stop_exporter(self, app, unit, ops_test, provided_collectors): + """Test starting and stopping the exporter results in correct charm status.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + # Stop the exporter, and the exporter should auto-restart after update status fire. + stop_cmd = "systemctl stop hardware-exporter" + async with ops_test.fast_forward(): + await asyncio.gather( + unit.run(stop_cmd), + ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=TIMEOUT), + ) + assert unit.workload_status_message == AppStatus.READY + + async def test_exporter_failed(self, app, unit, ops_test, provided_collectors): + """Test failure in the exporter results in correct charm status.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + # Setting incorrect log level will crash the exporter + async with ops_test.fast_forward(): + await asyncio.gather( + app.set_config({"exporter-log-level": "RANDOM_LEVEL"}), + ops_test.model.wait_for_idle(apps=[APP_NAME], status="blocked", timeout=TIMEOUT), + ) + assert unit.workload_status_message == AppStatus.INVALID_CONFIG_EXPORTER_LOG_LEVEL + + async with ops_test.fast_forward(): + await asyncio.gather( + app.reset_config(["exporter-log-level"]), + ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=TIMEOUT), + ) + assert unit.workload_status_message == AppStatus.READY + + async def test_config_collector_enabled(self, app, unit, ops_test, provided_collectors): + """Test whether provided collectors are present in exporter config.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + + try: + config = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + collectors_in_config = { + collector.replace("collector.", "") for collector in config.get("enable_collectors") + } + error_msg = ( + f"Provided collectors {provided_collectors} are different from" + f" enabled collectors in config {collectors_in_config}" + ) + assert provided_collectors == collectors_in_config, error_msg + + async def test_metrics_available(self, app, unit, ops_test, provided_collectors): """Test if metrics are available at the expected endpoint on unit.""" + if not provided_collectors: + pytest.skip("No collectors provided, skipping test") + # takes some time for exporter to start and metrics to be available try: async for attempt in AsyncRetrying( @@ -281,6 +397,50 @@ async def test_metrics_available(self, app, unit, ops_test): assert metrics, "Metrics result should not be empty" + async def test_redfish_client_timeout_config(self, app, unit, ops_test, provided_collectors): + """Test whether the redfish client's timeout depends on collect-timeout charm config.""" + if "redfish" not in provided_collectors: + pytest.skip("redfish not in provided collectors, skipping test") + + new_timeout = "20" + await asyncio.gather( + app.set_config({"collect-timeout": new_timeout}), + ops_test.model.wait_for_idle(apps=[APP_NAME]), + ) + + try: + config = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config["redfish_client_timeout"] == int(new_timeout) + + await app.reset_config(["collect-timeout"]) + + async def test_smarctl_exporter_snap_available(self, ops_test, app, unit): + """Test if smartctl exporter snap is installed and ranning on the unit.""" + snap_name = "smartctl-exporter" + if not assert_snap_installed(ops_test, unit.name, snap_name): + pytest.fail(f"{snap_name} snap is not installed on the unit.") + + check_active_cmd = "systemctl is-active snap.smartctl-exporter.smartctl-exporter" + results = await run_command_on_unit(ops_test, unit.name, check_active_cmd) + assert results.get("return-code") == 0 + assert results.get("stdout").strip() == "active" + + async def test_dcgm_exporter_snap_available(self, ops_test, app, unit, nvidia_present): + """Test if dcgm exporter snap is installed and ranning on the unit.""" + if not nvidia_present: + pytest.skip("dcgm not in provided collectors, skipping test") + + snap_name = "dcgm" + if not assert_snap_installed(ops_test, unit.name, snap_name): + pytest.fail(f"{snap_name} snap is not installed on the unit.") + + check_active_cmd = "systemctl is-active snap.dcgm.dcgm-exporter" + results = await run_command_on_unit(ops_test, unit.name, check_active_cmd) + assert results.get("return-code") == 0 + assert results.get("stdout").strip() == "active" + @pytest.mark.parametrize( "collector", [ @@ -458,16 +618,18 @@ async def test_resource_in_correct_location(self, ops_test, unit, required_resou results = await run_command_on_unit(ops_test, unit.name, check_resource_cmd) assert results.get("return-code") == 0, f"{symlink_bin} resource doesn't exist" - async def test_redfish_config(self, app, unit, ops_test): + async def test_redfish_config(self, ops_test, app, unit, provided_collectors): """Test Redfish options.""" + if "redfish" not in provided_collectors: + pytest.skip("redfish not in provided collectors, skipping test") # initially Redfish is available and enabled - cmd = "cat /etc/hardware-exporter-config.yaml" - results_before = await run_command_on_unit(ops_test, unit.name, cmd) - assert results_before.get("return-code") == 0 - config = yaml.safe_load(results_before.get("stdout").strip()) - assert config.get("redfish_host") is not None - assert config.get("redfish_username") is not None - assert config.get("redfish_client_timeout") is not None + try: + config_before = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config_before.get("redfish_host") is not None + assert config_before.get("redfish_username") is not None + assert config_before.get("redfish_client_timeout") is not None # Disable Redfish and see if the config is not present await asyncio.gather( @@ -475,13 +637,13 @@ async def test_redfish_config(self, app, unit, ops_test): ops_test.model.wait_for_idle(apps=[APP_NAME]), ) - cmd = "cat /etc/hardware-exporter-config.yaml" - results_after = await run_command_on_unit(ops_test, unit.name, cmd) - assert results_before.get("return-code") == 0 - config = yaml.safe_load(results_after.get("stdout").strip()) - assert config.get("redfish_host") is None - assert config.get("redfish_username") is None - assert config.get("redfish_client_timeout") is None + try: + config_after = await get_hardware_exporter_config(ops_test, unit.name) + except HardwareExporterConfigError: + pytest.fail("Not able to obtain hardware-exporter config!") + assert config_after.get("redfish_host") is None + assert config_after.get("redfish_username") is None + assert config_after.get("redfish_client_timeout") is None await app.reset_config(["redfish-disable"]) @@ -573,132 +735,29 @@ async def test_resource_clean_up(self, ops_test, app, unit, required_resources): ) -@pytest.mark.realhw -class TestCharm: - """Perform tests that require one or more exporters to be present.""" - - async def test_config_file_permissions(self, unit, ops_test): - """Check config file permissions are set correctly.""" - expected_file_mode = "600" - cmd = "stat -c '%a' /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, unit.name, cmd) - assert results.get("return-code") == 0 - assert results.get("stdout").rstrip("\n") == expected_file_mode - - async def test_config_changed_port(self, app, unit, ops_test): - """Test changing the config option: hardware-exporter-port.""" - new_port = "10001" - await asyncio.gather( - app.set_config({"hardware-exporter-port": new_port}), - ops_test.model.wait_for_idle(apps=[APP_NAME]), - ) - - cmd = "cat /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, unit.name, cmd) - assert results.get("return-code") == 0 - config = yaml.safe_load(results.get("stdout").strip()) - assert config["port"] == int(new_port) - - await app.reset_config(["hardware-exporter-port"]) - - async def test_no_redfish_config(self, unit, ops_test): - """Test that there is no Redfish options because it's not available on lxd machines.""" - cmd = "cat /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, unit.name, cmd) - assert results.get("return-code") == 0 - config = yaml.safe_load(results.get("stdout").strip()) - assert config.get("redfish_host") is None - assert config.get("redfish_username") is None - assert config.get("redfish_client_timeout") is None - - async def test_config_changed_log_level(self, app, unit, ops_test): - """Test changing the config option: exporter-log-level.""" - new_log_level = "DEBUG" - await asyncio.gather( - app.set_config({"exporter-log-level": new_log_level}), - ops_test.model.wait_for_idle(apps=[APP_NAME]), - ) - - cmd = "cat /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, unit.name, cmd) - assert results.get("return-code") == 0 - config = yaml.safe_load(results.get("stdout").strip()) - assert config["level"] == new_log_level - - await app.reset_config(["exporter-log-level"]) - - async def test_config_changed_collect_timeout(self, app, unit, ops_test): - """Test changing the config option: collect-timeout.""" - new_collect_timeout = "20" - await asyncio.gather( - app.set_config({"collect-timeout": new_collect_timeout}), - ops_test.model.wait_for_idle(apps=[APP_NAME]), - ) - - cmd = "cat /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, unit.name, cmd) - assert results.get("return-code") == 0 - config = yaml.safe_load(results.get("stdout").strip()) - assert config["collect_timeout"] == int(new_collect_timeout) - - await app.reset_config(["collect-timeout"]) - - async def test_start_and_stop_exporter(self, app, unit, ops_test): - """Test starting and stopping the exporter results in correct charm status.""" - # Stop the exporter, and the exporter should auto-restart after update status fire. - stop_cmd = "systemctl stop hardware-exporter" - async with ops_test.fast_forward(): - await asyncio.gather( - unit.run(stop_cmd), - ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=TIMEOUT), - ) - assert unit.workload_status_message == AppStatus.READY - - async def test_exporter_failed(self, app, unit, ops_test): - """Test failure in the exporter results in correct charm status.""" - # Setting incorrect log level will crash the exporter - async with ops_test.fast_forward(): - await asyncio.gather( - app.set_config({"exporter-log-level": "RANDOM_LEVEL"}), - ops_test.model.wait_for_idle(apps=[APP_NAME], status="blocked", timeout=TIMEOUT), - ) - assert unit.workload_status_message == AppStatus.INVALID_CONFIG_EXPORTER_LOG_LEVEL - - async with ops_test.fast_forward(): - await asyncio.gather( - app.reset_config(["exporter-log-level"]), - ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=TIMEOUT), - ) - assert unit.workload_status_message == AppStatus.READY - - async def test_on_remove_event(self, app, ops_test): - """Test _on_remove event cleans up the service on the host machine.""" - await asyncio.gather( - app.remove_relation(f"{APP_NAME}:general-info", f"{PRINCIPAL_APP_NAME}:juju-info"), - ops_test.model.wait_for_idle( - apps=[PRINCIPAL_APP_NAME], status="active", timeout=TIMEOUT - ), - ) - principal_unit = ops_test.model.applications[PRINCIPAL_APP_NAME].units[0] +@pytest.mark.abort_on_fail +async def test_on_remove_event(app, ops_test): + """Test _on_remove event cleans up the service on the host machine.""" + await asyncio.gather( + app.remove_relation(f"{APP_NAME}:general-info", f"{PRINCIPAL_APP_NAME}:juju-info"), + ops_test.model.wait_for_idle(apps=[PRINCIPAL_APP_NAME], status="active", timeout=TIMEOUT), + ) + principal_unit = ops_test.model.applications[PRINCIPAL_APP_NAME].units[0] - # Wait for cleanup activities to finish - await ops_test.model.block_until( - lambda: ops_test.model.applications[APP_NAME].status == "unknown" - ) + # Wait for cleanup activities to finish + await ops_test.model.block_until( + lambda: ops_test.model.applications[APP_NAME].status == "unknown" + ) - cmd = "ls /etc/hardware-exporter-config.yaml" - results = await run_command_on_unit(ops_test, principal_unit.name, cmd) - assert results.get("return-code") > 0 + cmd = "ls /etc/hardware-exporter-config.yaml" + results = await run_command_on_unit(ops_test, principal_unit.name, cmd) + assert results.get("return-code") > 0 - cmd = "ls /etc/systemd/system/hardware-exporter.service" - results = await run_command_on_unit(ops_test, principal_unit.name, cmd) - assert results.get("return-code") > 0 + cmd = "ls /etc/systemd/system/hardware-exporter.service" + results = await run_command_on_unit(ops_test, principal_unit.name, cmd) + assert results.get("return-code") > 0 - await asyncio.gather( - ops_test.model.add_relation( - f"{APP_NAME}:general-info", f"{PRINCIPAL_APP_NAME}:juju-info" - ), - ops_test.model.wait_for_idle( - apps=[PRINCIPAL_APP_NAME], status="active", timeout=TIMEOUT - ), - ) + await asyncio.gather( + ops_test.model.add_relation(f"{APP_NAME}:general-info", f"{PRINCIPAL_APP_NAME}:juju-info"), + ops_test.model.wait_for_idle(apps=[PRINCIPAL_APP_NAME], status="active", timeout=TIMEOUT), + ) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 7c948cf3..70df8af4 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Optional +import yaml from async_lru import alru_cache RESOURCES_DIR = Path("./resources/") @@ -44,6 +45,12 @@ class MetricsFetchError(Exception): pass +class HardwareExporterConfigError(Exception): + """Raise if something goes wrong when getting hardware-exporter config.""" + + pass + + async def run_command_on_unit(ops_test, unit_name, command): complete_command = ["exec", "--unit", unit_name, "--", *command.split()] return_code, stdout, _ = await ops_test.juju(*complete_command) @@ -54,6 +61,15 @@ async def run_command_on_unit(ops_test, unit_name, command): return results +async def get_hardware_exporter_config(ops_test, unit_name) -> dict: + """Return hardware-exporter config from endpoint on unit.""" + command = "cat /etc/hardware-exporter/config.yaml" + results = await run_command_on_unit(ops_test, unit_name, command) + if results.get("return-code") > 0: + raise HardwareExporterConfigError + return yaml.safe_load(results.get("stdout")) + + @alru_cache async def get_metrics_output(ops_test, unit_name) -> Optional[dict[str, list[Metric]]]: """Return parsed prometheus metric output from endpoint on unit. @@ -68,6 +84,15 @@ async def get_metrics_output(ops_test, unit_name) -> Optional[dict[str, list[Met return parsed_metrics +async def assert_snap_installed(ops_test, unit_name: str, snap_name: str) -> bool: + """Assert whether snap is installed on the model.""" + cmd = f"snap list {snap_name}" + results = await run_command_on_unit(ops_test, unit_name, cmd) + if results.get("return-code") > 0 or snap_name not in results.get("stdout"): + return False + return True + + def assert_metrics(metrics: list[Metric], expected_metric_values_map: dict[str, float]) -> bool: """Assert whether values in obtained list of metrics for a collector are as expected. diff --git a/tox.ini b/tox.ini index a388a52c..894148dd 100644 --- a/tox.ini +++ b/tox.ini @@ -69,6 +69,7 @@ deps = passenv = REDFISH_USERNAME REDFISH_PASSWORD + CHARM_PATH_* [testenv:integration] description = Run integration tests with COS