From 30a1810fe6092038e84f3e41c0be33287cda390f Mon Sep 17 00:00:00 2001 From: Tullio Sebastiani Date: Tue, 21 Nov 2023 15:16:56 +0100 Subject: [PATCH] hog scenarios + telemetry removed --- CI/config/common_test_config.yaml | 43 ------ CI/run.sh | 4 +- CI/run_test.sh | 3 +- CI/scenarios/arcaflow/cpu-hog/config.yaml | 11 ++ CI/scenarios/arcaflow/cpu-hog/input.yaml | 14 ++ .../arcaflow/cpu-hog/sub-workflow.yaml | 94 ++++++++++++ CI/scenarios/arcaflow/cpu-hog/workflow.yaml | 77 ++++++++++ CI/scenarios/arcaflow/io-hog/config.yaml | 10 ++ CI/scenarios/arcaflow/io-hog/input.yaml | 13 ++ .../arcaflow/io-hog/sub-workflow.yaml | 139 ++++++++++++++++++ CI/scenarios/arcaflow/io-hog/workflow.yaml | 114 ++++++++++++++ CI/scenarios/arcaflow/memory-hog/config.yaml | 11 ++ CI/scenarios/arcaflow/memory-hog/input.yaml | 13 ++ .../arcaflow/memory-hog/sub-workflow.yaml | 86 +++++++++++ .../arcaflow/memory-hog/workflow.yaml | 73 +++++++++ CI/scenarios/node_cpu_hog_engine.yaml | 34 ----- CI/scenarios/node_cpu_hog_engine_node.yaml | 34 ----- CI/scenarios/node_io_engine.yaml | 35 ----- CI/scenarios/node_io_engine_node.yaml | 35 ----- CI/scenarios/node_mem_engine.yaml | 28 ---- CI/scenarios/node_mem_engine_node.yaml | 28 ---- CI/tests/test_arca_cpu_hog.sh | 18 +++ CI/tests/test_arca_io_hog.sh | 18 +++ CI/tests/test_arca_memory_hog.sh | 18 +++ CI/tests/test_cpu_hog.sh | 20 --- CI/tests/test_cpu_hog_gh.sh | 20 --- CI/tests/test_io_hog.sh | 20 --- CI/tests/test_io_hog_gh.sh | 19 --- CI/tests/test_mem_hog.sh | 20 --- CI/tests/test_mem_hog_gh.sh | 19 --- CI/tests/test_telemetry.sh | 31 ++++ 31 files changed, 743 insertions(+), 359 deletions(-) delete mode 100644 CI/config/common_test_config.yaml create mode 100644 CI/scenarios/arcaflow/cpu-hog/config.yaml create mode 100644 CI/scenarios/arcaflow/cpu-hog/input.yaml create mode 100644 CI/scenarios/arcaflow/cpu-hog/sub-workflow.yaml create mode 100644 CI/scenarios/arcaflow/cpu-hog/workflow.yaml create mode 100644 CI/scenarios/arcaflow/io-hog/config.yaml create mode 100644 CI/scenarios/arcaflow/io-hog/input.yaml create mode 100644 CI/scenarios/arcaflow/io-hog/sub-workflow.yaml create mode 100644 CI/scenarios/arcaflow/io-hog/workflow.yaml create mode 100644 CI/scenarios/arcaflow/memory-hog/config.yaml create mode 100644 CI/scenarios/arcaflow/memory-hog/input.yaml create mode 100644 CI/scenarios/arcaflow/memory-hog/sub-workflow.yaml create mode 100644 CI/scenarios/arcaflow/memory-hog/workflow.yaml delete mode 100644 CI/scenarios/node_cpu_hog_engine.yaml delete mode 100644 CI/scenarios/node_cpu_hog_engine_node.yaml delete mode 100644 CI/scenarios/node_io_engine.yaml delete mode 100644 CI/scenarios/node_io_engine_node.yaml delete mode 100644 CI/scenarios/node_mem_engine.yaml delete mode 100644 CI/scenarios/node_mem_engine_node.yaml create mode 100644 CI/tests/test_arca_cpu_hog.sh create mode 100644 CI/tests/test_arca_io_hog.sh create mode 100644 CI/tests/test_arca_memory_hog.sh delete mode 100755 CI/tests/test_cpu_hog.sh delete mode 100755 CI/tests/test_cpu_hog_gh.sh delete mode 100755 CI/tests/test_io_hog.sh delete mode 100755 CI/tests/test_io_hog_gh.sh delete mode 100755 CI/tests/test_mem_hog.sh delete mode 100755 CI/tests/test_mem_hog_gh.sh create mode 100644 CI/tests/test_telemetry.sh diff --git a/CI/config/common_test_config.yaml b/CI/config/common_test_config.yaml deleted file mode 100644 index c36a6d591..000000000 --- a/CI/config/common_test_config.yaml +++ /dev/null @@ -1,43 +0,0 @@ -kraken: - distribution: openshift # Distribution can be kubernetes or openshift. - kubeconfig_path: ~/.kube/config # Path to kubeconfig. - exit_on_failure: False # Exit when a post action scenario fails. - litmus_version: v1.13.6 # Litmus version to install. - litmus_uninstall: False # If you want to uninstall litmus if failure. - chaos_scenarios: # List of policies/chaos scenarios to load. - - $scenario_type: # List of chaos pod scenarios to load. - - $scenario_file - $post_config -cerberus: - cerberus_enabled: False # Enable it when cerberus is previously installed. - cerberus_url: # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal. - -performance_monitoring: - deploy_dashboards: False # Install a mutable grafana and load the performance dashboards. Enable this only when running on OpenShift. - repo: "https://github.com/cloud-bulldozer/performance-dashboards.git" - kube_burner_binary_url: "https://github.com/cloud-bulldozer/kube-burner/releases/download/v0.9.1/kube-burner-0.9.1-Linux-x86_64.tar.gz" - capture_metrics: False - config_path: config/kube_burner.yaml # Define the Elasticsearch url and index name in this config. - metrics_profile_path: config/metrics-aggregated.yaml - prometheus_url: # The prometheus url/route is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. - prometheus_bearer_token: # The bearer token is automatically obtained in case of OpenShift, please set it when the distribution is Kubernetes. This is needed to authenticate with prometheus. - uuid: # uuid for the run is generated by default if not set. - enable_alerts: False # Runs the queries specified in the alert profile and displays the info or exits 1 when severity=error. - alert_profile: config/alerts # Path to alert profile with the prometheus queries. - -tunings: - wait_duration: 6 # Duration to wait between each chaos scenario. - iterations: 1 # Number of times to execute the scenarios. - daemon_mode: False # Iterations are set to infinity which means that the kraken will cause chaos forever. -telemetry: - enabled: False # enable/disables the telemetry collection feature - api_url: https://ulnmf9xv7j.execute-api.us-west-2.amazonaws.com/production #telemetry service endpoint - username: username # telemetry service username - password: password # telemetry service password - prometheus_backup: True # enables/disables prometheus data collection - full_prometheus_backup: False # if is set to False only the /prometheus/wal folder will be downloaded. - backup_threads: 5 # number of telemetry download/upload threads - archive_path: /tmp # local path where the archive files will be temporarly stored - max_retries: 0 # maximum number of upload retries (if 0 will retry forever) - run_tag: '' # if set, this will be appended to the run folder in the bucket (useful to group the runs) - archive_size: 10000 # the size of the prometheus data archive size in KB. The lower the size of archive is diff --git a/CI/run.sh b/CI/run.sh index 9c00832b5..49ef2e5cb 100755 --- a/CI/run.sh +++ b/CI/run.sh @@ -39,7 +39,7 @@ echo '-----------------------|--------|---------' >> $results # Run each test for test_name in `cat CI/tests/my_tests` do - wait_cluster_become_ready + #wait_cluster_become_ready ./CI/run_test.sh $test_name $results - wait_cluster_become_ready + #wait_cluster_become_ready done diff --git a/CI/run_test.sh b/CI/run_test.sh index 90fc999ea..f692d2d71 100755 --- a/CI/run_test.sh +++ b/CI/run_test.sh @@ -11,7 +11,6 @@ function get_time_format() { echo $hours:$minutes:$seconds } ci_test=`echo $1` - results_file=$2 echo -e "\n======================================================================" @@ -23,7 +22,7 @@ ci_results="CI/out/$ci_test.out" echo "results $ci_results" >> $ci_results SECONDS=0 -if /bin/bash CI/tests/$ci_test.sh >> $ci_results 2>&1 +if /bin/bash CI/tests/$ci_test.sh >> $ci_results then # if the test passes update the results and complete duration=$SECONDS diff --git a/CI/scenarios/arcaflow/cpu-hog/config.yaml b/CI/scenarios/arcaflow/cpu-hog/config.yaml new file mode 100644 index 000000000..a03beb4c5 --- /dev/null +++ b/CI/scenarios/arcaflow/cpu-hog/config.yaml @@ -0,0 +1,11 @@ +--- +deployer: + connection: {} + type: kubernetes +log: + level: debug +logged_outputs: + error: + level: error + success: + level: debug diff --git a/CI/scenarios/arcaflow/cpu-hog/input.yaml b/CI/scenarios/arcaflow/cpu-hog/input.yaml new file mode 100644 index 000000000..3bcbece9f --- /dev/null +++ b/CI/scenarios/arcaflow/cpu-hog/input.yaml @@ -0,0 +1,14 @@ +input_list: +- cpu_count: 1 + cpu_load_percentage: 80 + cpu_method: all + duration: 30s + node_selector: {} + # node selector example + # node_selector: + # kubernetes.io/hostname: master + kubeconfig: "" + namespace: default + +# duplicate this section to run simultaneous stressors in the same run + diff --git a/CI/scenarios/arcaflow/cpu-hog/sub-workflow.yaml b/CI/scenarios/arcaflow/cpu-hog/sub-workflow.yaml new file mode 100644 index 000000000..39c74355a --- /dev/null +++ b/CI/scenarios/arcaflow/cpu-hog/sub-workflow.yaml @@ -0,0 +1,94 @@ +version: v0.1.0 +input: + root: RootObject + objects: + RootObject: + id: input_item + properties: + kubeconfig: + display: + description: The complete kubeconfig file as a string + name: Kubeconfig file contents + type: + type_id: string + required: true + namespace: + display: + description: The namespace where the container will be deployed + name: Namespace + type: + type_id: string + required: true + node_selector: + display: + description: kubernetes node name where the plugin must be deployed + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + duration: + display: + name: duration the scenario expressed in seconds + description: stop stress test after T seconds. One can also specify the units of time in + seconds, minutes, hours, days or years with the suffix s, m, h, d or y + type: + type_id: string + required: true + cpu_count: + display: + description: Number of CPU cores to be used (0 means all) + name: number of CPUs + type: + type_id: integer + required: true + cpu_method: + display: + description: CPU stress method + name: fine grained control of which cpu stressors to use (ackermann, cfloat etc.) + type: + type_id: string + required: true + cpu_load_percentage: + display: + description: load CPU by percentage + name: CPU load + type: + type_id: integer + required: true + +steps: + kubeconfig: + plugin: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0 + input: + kubeconfig: !expr $.input.kubeconfig + stressng: + plugin: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0 + step: workload + input: + cleanup: "true" + StressNGParams: + timeout: !expr $.input.duration + stressors: + - stressor: cpu + cpu_count: !expr $.input.cpu_count + cpu_method: !expr $.input.cpu_method + cpu_load: !expr $.input.cpu_load_percentage + deploy: + type: kubernetes + connection: !expr $.steps.kubeconfig.outputs.success.connection + pod: + metadata: + namespace: !expr $.input.namespace + labels: + arcaflow: stressng + spec: + nodeSelector: !expr $.input.node_selector + pluginContainer: + imagePullPolicy: Always +outputs: + success: + stressng: !expr $.steps.stressng.outputs.success + diff --git a/CI/scenarios/arcaflow/cpu-hog/workflow.yaml b/CI/scenarios/arcaflow/cpu-hog/workflow.yaml new file mode 100644 index 000000000..c399372b9 --- /dev/null +++ b/CI/scenarios/arcaflow/cpu-hog/workflow.yaml @@ -0,0 +1,77 @@ +version: v0.1.0 +input: + root: RootObject + objects: + RootObject: + id: RootObject + properties: + input_list: + type: + type_id: list + items: + id: input_item + type_id: object + properties: + kubeconfig: + display: + description: The complete kubeconfig file as a string + name: Kubeconfig file contents + type: + type_id: string + required: true + namespace: + display: + description: The namespace where the container will be deployed + name: Namespace + type: + type_id: string + required: true + node_selector: + display: + description: kubernetes node name where the plugin must be deployed + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + duration: + display: + name: duration the scenario expressed in seconds + description: stop stress test after T seconds. One can also specify the units of time in + seconds, minutes, hours, days or years with the suffix s, m, h, d or y + type: + type_id: string + required: true + cpu_count: + display: + description: Number of CPU cores to be used (0 means all) + name: number of CPUs + type: + type_id: integer + required: true + cpu_method: + display: + description: CPU stress method + name: fine grained control of which cpu stressors to use (ackermann, cfloat etc.) + type: + type_id: string + required: true + cpu_load_percentage: + display: + description: load CPU by percentage + name: CPU load + type: + type_id: integer + required: true +steps: + workload_loop: + kind: foreach + items: !expr $.input.input_list + workflow: sub-workflow.yaml + parallelism: 1000 +outputs: + success: + workloads: !expr $.steps.workload_loop.outputs.success.data + diff --git a/CI/scenarios/arcaflow/io-hog/config.yaml b/CI/scenarios/arcaflow/io-hog/config.yaml new file mode 100644 index 000000000..093464a8d --- /dev/null +++ b/CI/scenarios/arcaflow/io-hog/config.yaml @@ -0,0 +1,10 @@ +deployer: + connection: {} + type: kubernetes +log: + level: debug +logged_outputs: + error: + level: error + success: + level: debug diff --git a/CI/scenarios/arcaflow/io-hog/input.yaml b/CI/scenarios/arcaflow/io-hog/input.yaml new file mode 100644 index 000000000..a315a0899 --- /dev/null +++ b/CI/scenarios/arcaflow/io-hog/input.yaml @@ -0,0 +1,13 @@ +input_list: +- duration: 30s + io_block_size: 1m + io_workers: 1 + io_write_bytes: 10m + kubeconfig: '' + namespace: default + node_selector: {} + target_pod_folder: /hog-data + target_pod_volume: + hostPath: + path: /tmp + name: node-volume diff --git a/CI/scenarios/arcaflow/io-hog/sub-workflow.yaml b/CI/scenarios/arcaflow/io-hog/sub-workflow.yaml new file mode 100644 index 000000000..c106f2708 --- /dev/null +++ b/CI/scenarios/arcaflow/io-hog/sub-workflow.yaml @@ -0,0 +1,139 @@ +version: v0.1.0 +input: + root: RootObject + objects: + RootObject: + id: input_item + properties: + kubeconfig: + display: + description: The complete kubeconfig file as a string + name: Kubeconfig file contents + type: + type_id: string + required: true + namespace: + display: + description: The namespace where the container will be deployed + name: Namespace + type: + type_id: string + required: true + node_selector: + display: + description: kubernetes node name where the plugin must be deployed + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + duration: + display: + name: duration the scenario expressed in seconds + description: stop stress test after T seconds. One can also specify the units of time in + seconds, minutes, hours, days or years with the suffix s, m, h, d or y + type: + type_id: string + required: true + io_workers: + display: + description: number of workers + name: start N workers continually writing, reading and removing temporary files + type: + type_id: integer + required: true + io_block_size: + display: + description: single write size + name: specify size of each write in bytes. Size can be from 1 byte to 4MB. + type: + type_id: string + required: true + io_write_bytes: + display: + description: Total number of bytes written + name: write N bytes for each hdd process, the default is 1 GB. One can specify the size + as % of free space on the file system or in units of Bytes, KBytes, MBytes and + GBytes using the suffix b, k, m or g + type: + type_id: string + required: true + target_pod_folder: + display: + description: Target Folder + name: Folder in the pod where the test will be executed and the test files will be written + type: + type_id: string + required: true + target_pod_volume: + display: + name: kubernetes volume definition + description: the volume that will be attached to the pod. In order to stress + the node storage only hosPath mode is currently supported + type: + type_id: object + id: k8s_volume + properties: + name: + display: + description: name of the volume (must match the name in pod definition) + type: + type_id: string + required: true + hostPath: + display: + description: hostPath options expressed as string map (key-value) + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + required: true + +steps: + kubeconfig: + plugin: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0 + input: + kubeconfig: !expr $.input.kubeconfig + stressng: + plugin: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0 + step: workload + input: + cleanup: "true" + StressNGParams: + timeout: !expr $.input.duration + workdir: !expr $.input.target_pod_folder + stressors: + - stressor: hdd + hdd: !expr $.input.io_workers + hdd_bytes: !expr $.input.io_write_bytes + hdd_write_size: !expr $.input.io_block_size + + deploy: + type: kubernetes + connection: !expr $.steps.kubeconfig.outputs.success.connection + pod: + metadata: + namespace: !expr $.input.namespace + labels: + arcaflow: stressng + spec: + nodeSelector: !expr $.input.node_selector + pluginContainer: + imagePullPolicy: Always + securityContext: + privileged: true + volumeMounts: + - mountPath: /hog-data + name: node-volume + volumes: + - !expr $.input.target_pod_volume + +outputs: + success: + stressng: !expr $.steps.stressng.outputs.success + diff --git a/CI/scenarios/arcaflow/io-hog/workflow.yaml b/CI/scenarios/arcaflow/io-hog/workflow.yaml new file mode 100644 index 000000000..6cda53bca --- /dev/null +++ b/CI/scenarios/arcaflow/io-hog/workflow.yaml @@ -0,0 +1,114 @@ +version: v0.1.0 +input: + root: RootObject + objects: + RootObject: + id: RootObject + properties: + input_list: + type: + type_id: list + items: + id: input_item + type_id: object + properties: + kubeconfig: + display: + description: The complete kubeconfig file as a string + name: Kubeconfig file contents + type: + type_id: string + required: true + namespace: + display: + description: The namespace where the container will be deployed + name: Namespace + type: + type_id: string + required: true + node_selector: + display: + description: kubernetes node name where the plugin must be deployed + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + duration: + display: + name: duration the scenario expressed in seconds + description: stop stress test after T seconds. One can also specify the units of time in + seconds, minutes, hours, days or years with the suffix s, m, h, d or y + type: + type_id: string + required: true + io_workers: + display: + description: number of workers + name: start N workers continually writing, reading and removing temporary files + type: + type_id: integer + required: true + io_block_size: + display: + description: single write size + name: specify size of each write in bytes. Size can be from 1 byte to 4MB. + type: + type_id: string + required: true + io_write_bytes: + display: + description: Total number of bytes written + name: write N bytes for each hdd process, the default is 1 GB. One can specify the size + as % of free space on the file system or in units of Bytes, KBytes, MBytes and + GBytes using the suffix b, k, m or g + type: + type_id: string + required: true + target_pod_folder: + display: + description: Target Folder + name: Folder in the pod where the test will be executed and the test files will be written + type: + type_id: string + required: true + target_pod_volume: + display: + name: kubernetes volume definition + description: the volume that will be attached to the pod. In order to stress + the node storage only hosPath mode is currently supported + type: + type_id: object + id: k8s_volume + properties: + name: + display: + description: name of the volume (must match the name in pod definition) + type: + type_id: string + required: true + hostPath: + display: + description: hostPath options expressed as string map (key-value) + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + required: true +steps: + workload_loop: + kind: foreach + items: !expr $.input.input_list + workflow: sub-workflow.yaml + parallelism: 1000 +outputs: + success: + workloads: !expr $.steps.workload_loop.outputs.success.data + + + diff --git a/CI/scenarios/arcaflow/memory-hog/config.yaml b/CI/scenarios/arcaflow/memory-hog/config.yaml new file mode 100644 index 000000000..a03beb4c5 --- /dev/null +++ b/CI/scenarios/arcaflow/memory-hog/config.yaml @@ -0,0 +1,11 @@ +--- +deployer: + connection: {} + type: kubernetes +log: + level: debug +logged_outputs: + error: + level: error + success: + level: debug diff --git a/CI/scenarios/arcaflow/memory-hog/input.yaml b/CI/scenarios/arcaflow/memory-hog/input.yaml new file mode 100644 index 000000000..70b72c969 --- /dev/null +++ b/CI/scenarios/arcaflow/memory-hog/input.yaml @@ -0,0 +1,13 @@ +input_list: +- duration: 30s + vm_bytes: 10% + vm_workers: 2 + node_selector: { } + # node selector example + # node_selector: + # kubernetes.io/hostname: master + kubeconfig: "" + namespace: default + +# duplicate this section to run simultaneous stressors in the same run + diff --git a/CI/scenarios/arcaflow/memory-hog/sub-workflow.yaml b/CI/scenarios/arcaflow/memory-hog/sub-workflow.yaml new file mode 100644 index 000000000..cdcd45864 --- /dev/null +++ b/CI/scenarios/arcaflow/memory-hog/sub-workflow.yaml @@ -0,0 +1,86 @@ +version: v0.1.0 +input: + root: RootObject + objects: + RootObject: + id: input_item + properties: + kubeconfig: + display: + description: The complete kubeconfig file as a string + name: Kubeconfig file contents + type: + type_id: string + required: true + namespace: + display: + description: The namespace where the container will be deployed + name: Namespace + type: + type_id: string + required: true + node_selector: + display: + description: kubernetes node name where the plugin must be deployed + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + duration: + display: + name: duration the scenario expressed in seconds + description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y + type: + type_id: string + required: true + vm_workers: + display: + description: Number of VM stressors to be run (0 means 1 stressor per CPU) + name: Number of VM stressors + type: + type_id: integer + required: true + vm_bytes: + display: + description: N bytes per vm process, the default is 256MB. The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g. + name: Kubeconfig file contents + type: + type_id: string + required: true + +steps: + kubeconfig: + plugin: quay.io/arcalot/arcaflow-plugin-kubeconfig:0.2.0 + input: + kubeconfig: !expr $.input.kubeconfig + stressng: + plugin: quay.io/arcalot/arcaflow-plugin-stressng:0.5.0 + step: workload + input: + cleanup: "true" + StressNGParams: + timeout: !expr $.input.duration + stressors: + - stressor: vm + vm: !expr $.input.vm_workers + vm_bytes: !expr $.input.vm_bytes + deploy: + type: kubernetes + connection: !expr $.steps.kubeconfig.outputs.success.connection + pod: + metadata: + namespace: !expr $.input.namespace + labels: + arcaflow: stressng + spec: + nodeSelector: !expr $.input.node_selector + pluginContainer: + imagePullPolicy: Always + +outputs: + success: + stressng: !expr $.steps.stressng.outputs.success + diff --git a/CI/scenarios/arcaflow/memory-hog/workflow.yaml b/CI/scenarios/arcaflow/memory-hog/workflow.yaml new file mode 100644 index 000000000..fb9bf1588 --- /dev/null +++ b/CI/scenarios/arcaflow/memory-hog/workflow.yaml @@ -0,0 +1,73 @@ +version: v0.1.0 +input: + root: RootObject + objects: + RootObject: + id: RootObject + properties: + input_list: + type: + type_id: list + items: + id: input_item + type_id: object + properties: + kubeconfig: + display: + description: The complete kubeconfig file as a string + name: Kubeconfig file contents + type: + type_id: string + required: true + namespace: + display: + description: The namespace where the container will be deployed + name: Namespace + type: + type_id: string + required: true + node_selector: + display: + description: kubernetes node name where the plugin must be deployed + type: + type_id: map + values: + type_id: string + keys: + type_id: string + required: true + duration: + display: + name: duration the scenario expressed in seconds + description: stop stress test after T seconds. One can also specify the units of time in seconds, minutes, hours, days or years with the suffix s, m, h, d or y + type: + type_id: string + required: true + vm_workers: + display: + description: Number of VM stressors to be run (0 means 1 stressor per CPU) + name: Number of VM stressors + type: + type_id: integer + required: true + vm_bytes: + display: + description: N bytes per vm process, the default is 256MB. The size can be expressed in units of Bytes, KBytes, MBytes and GBytes using the suffix b, k, m or g. + name: Kubeconfig file contents + type: + type_id: string + required: true +steps: + workload_loop: + kind: foreach + items: !expr $.input.input_list + workflow: sub-workflow.yaml + parallelism: 1000 +outputs: + success: + workloads: !expr $.steps.workload_loop.outputs.success.data + + + + + diff --git a/CI/scenarios/node_cpu_hog_engine.yaml b/CI/scenarios/node_cpu_hog_engine.yaml deleted file mode 100644 index b095942cd..000000000 --- a/CI/scenarios/node_cpu_hog_engine.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: litmuschaos.io/v1alpha1 -kind: ChaosEngine -metadata: - name: nginx-chaos - namespace: litmus -spec: - # It can be true/false - annotationCheck: 'false' - # It can be active/stop - engineState: 'active' - chaosServiceAccount: litmus-sa - monitoring: false - # It can be delete/retain - jobCleanUpPolicy: 'delete' - experiments: - - name: node-cpu-hog - spec: - components: - env: - # set chaos duration (in sec) as desired - - name: TOTAL_CHAOS_DURATION - value: '10' - - # Number of cores of node CPU to be consumed - - name: NODE_CPU_CORE - value: '1' - - # percentage of total nodes to target - - name: NODES_AFFECTED_PERC - value: '30' - - # ENTER THE COMMA SEPARATED TARGET NODES NAME - - name: TARGET_NODES - value: $WORKER_NODE diff --git a/CI/scenarios/node_cpu_hog_engine_node.yaml b/CI/scenarios/node_cpu_hog_engine_node.yaml deleted file mode 100644 index 2ba399df2..000000000 --- a/CI/scenarios/node_cpu_hog_engine_node.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: litmuschaos.io/v1alpha1 -kind: ChaosEngine -metadata: - name: nginx-chaos - namespace: litmus -spec: - # It can be true/false - annotationCheck: 'false' - # It can be active/stop - engineState: 'active' - chaosServiceAccount: litmus-sa - monitoring: false - # It can be delete/retain - jobCleanUpPolicy: 'delete' - experiments: - - name: node-cpu-hog - spec: - components: - env: - # set chaos duration (in sec) as desired - - name: TOTAL_CHAOS_DURATION - value: '10' - - # Number of cores of node CPU to be consumed - - name: NODE_CPU_CORE - value: '1' - - # percentage of total nodes to target - - name: NODES_AFFECTED_PERC - value: '30' - - # ENTER THE COMMA SEPARATED TARGET NODES NAME - - name: TARGET_NODES - value: diff --git a/CI/scenarios/node_io_engine.yaml b/CI/scenarios/node_io_engine.yaml deleted file mode 100644 index 51fb72086..000000000 --- a/CI/scenarios/node_io_engine.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: litmuschaos.io/v1alpha1 -kind: ChaosEngine -metadata: - name: nginx-chaos - namespace: litmus -spec: - # It can be delete/retain - jobCleanUpPolicy: 'retain' - # It can be active/stop - engineState: 'active' - chaosServiceAccount: litmus-sa - experiments: - - name: node-io-stress - spec: - components: - env: - # set chaos duration (in sec) as desired - - name: TOTAL_CHAOS_DURATION - value: '10' - - ## specify the size as percentage of free space on the file system - - name: FILESYSTEM_UTILIZATION_PERCENTAGE - value: '100' - - ## Number of core of CPU - - name: CPU - value: '1' - - ## Total number of workers default value is 4 - - name: NUMBER_OF_WORKERS - value: '3' - - ## enter the comma separated target nodes name - - name: TARGET_NODES - value: $WORKER_NODE diff --git a/CI/scenarios/node_io_engine_node.yaml b/CI/scenarios/node_io_engine_node.yaml deleted file mode 100644 index 73affe46a..000000000 --- a/CI/scenarios/node_io_engine_node.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: litmuschaos.io/v1alpha1 -kind: ChaosEngine -metadata: - name: nginx-chaos - namespace: litmus -spec: - # It can be delete/retain - jobCleanUpPolicy: 'retain' - # It can be active/stop - engineState: 'active' - chaosServiceAccount: litmus-sa - experiments: - - name: node-io-stress - spec: - components: - env: - # set chaos duration (in sec) as desired - - name: TOTAL_CHAOS_DURATION - value: '10' - - ## specify the size as percentage of free space on the file system - - name: FILESYSTEM_UTILIZATION_PERCENTAGE - value: '100' - - ## Number of core of CPU - - name: CPU - value: '1' - - ## Total number of workers default value is 4 - - name: NUMBER_OF_WORKERS - value: '3' - - ## enter the comma separated target nodes name - - name: TARGET_NODES - value: diff --git a/CI/scenarios/node_mem_engine.yaml b/CI/scenarios/node_mem_engine.yaml deleted file mode 100644 index 33fc7ff7f..000000000 --- a/CI/scenarios/node_mem_engine.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: litmuschaos.io/v1alpha1 -kind: ChaosEngine -metadata: - name: nginx-chaos - namespace: litmus -spec: - # It can be delete/retain - jobCleanUpPolicy: 'retain' - # It can be active/stop - engineState: 'active' - chaosServiceAccount: litmus-sa - experiments: - - name: node-memory-hog - spec: - components: - env: - # set chaos duration (in sec) as desired - - name: TOTAL_CHAOS_DURATION - value: '10' - - ## Specify the size as percent of total node capacity Ex: '30' - ## Note: For consuming memory in mebibytes change the variable to MEMORY_CONSUMPTION_MEBIBYTES - - name: MEMORY_CONSUMPTION_PERCENTAGE - value: '30' - - # ENTER THE COMMA SEPARATED TARGET NODES NAME - - name: TARGET_NODES - value: $WORKER_NODE diff --git a/CI/scenarios/node_mem_engine_node.yaml b/CI/scenarios/node_mem_engine_node.yaml deleted file mode 100644 index 6de33f248..000000000 --- a/CI/scenarios/node_mem_engine_node.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: litmuschaos.io/v1alpha1 -kind: ChaosEngine -metadata: - name: nginx-chaos - namespace: litmus -spec: - # It can be delete/retain - jobCleanUpPolicy: 'retain' - # It can be active/stop - engineState: 'active' - chaosServiceAccount: litmus-sa - experiments: - - name: node-memory-hog - spec: - components: - env: - # set chaos duration (in sec) as desired - - name: TOTAL_CHAOS_DURATION - value: '10' - - ## Specify the size as percent of total node capacity Ex: '30' - ## Note: For consuming memory in mebibytes change the variable to MEMORY_CONSUMPTION_MEBIBYTES - - name: MEMORY_CONSUMPTION_PERCENTAGE - value: '30' - - # ENTER THE COMMA SEPARATED TARGET NODES NAME - - name: TARGET_NODES - value: diff --git a/CI/tests/test_arca_cpu_hog.sh b/CI/tests/test_arca_cpu_hog.sh new file mode 100644 index 000000000..ad39f44f0 --- /dev/null +++ b/CI/tests/test_arca_cpu_hog.sh @@ -0,0 +1,18 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + + +function functional_test_arca_cpu_hog { + export scenario_type="arcaflow_scenarios" + export scenario_file="CI/scenarios/arcaflow/cpu-hog/input.yaml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/arca_cpu_hog.yaml + python3 -m coverage run -a run_kraken.py -c CI/config/arca_cpu_hog.yaml + echo "Arcaflow CPU Hog: Success" +} + +functional_test_arca_cpu_hog \ No newline at end of file diff --git a/CI/tests/test_arca_io_hog.sh b/CI/tests/test_arca_io_hog.sh new file mode 100644 index 000000000..0d92f3e86 --- /dev/null +++ b/CI/tests/test_arca_io_hog.sh @@ -0,0 +1,18 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + + +function functional_test_arca_io_hog { + export scenario_type="arcaflow_scenarios" + export scenario_file="CI/scenarios/arcaflow/io-hog/input.yaml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/arca_io_hog.yaml + python3 -m coverage run -a run_kraken.py -c CI/config/arca_io_hog.yaml + echo "Arcaflow IO Hog: Success" +} + +functional_test_arca_io_hog \ No newline at end of file diff --git a/CI/tests/test_arca_memory_hog.sh b/CI/tests/test_arca_memory_hog.sh new file mode 100644 index 000000000..430d70607 --- /dev/null +++ b/CI/tests/test_arca_memory_hog.sh @@ -0,0 +1,18 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + + +function functional_test_arca_memory_hog { + export scenario_type="arcaflow_scenarios" + export scenario_file="CI/scenarios/arcaflow/memory-hog/input.yaml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/arca_memory_hog.yaml + python3 -m coverage run -a run_kraken.py -c CI/config/arca_memory_hog.yaml + echo "Arcaflow Memory Hog: Success" +} + +functional_test_arca_cpu_hog \ No newline at end of file diff --git a/CI/tests/test_cpu_hog.sh b/CI/tests/test_cpu_hog.sh deleted file mode 100755 index 0ab06c718..000000000 --- a/CI/tests/test_cpu_hog.sh +++ /dev/null @@ -1,20 +0,0 @@ -set -xeEo pipefail - -source CI/tests/common.sh - -trap error ERR -trap finish EXIT - - -function functional_test_litmus_cpu { - - export scenario_type="litmus_scenarios" - export scenario_file="- scenarios/templates/litmus-rbac.yaml" - export post_config="- CI/scenarios/node_cpu_hog_engine_node.yaml" - envsubst < CI/config/common_test_config.yaml > CI/config/litmus_config.yaml - envsubst < CI/scenarios/node_cpu_hog_engine.yaml > CI/scenarios/node_cpu_hog_engine_node.yaml - python3 -m coverage run -a run_kraken.py -c CI/config/litmus_config.yaml - echo "Litmus scenario test: Success" -} - -functional_test_litmus_cpu diff --git a/CI/tests/test_cpu_hog_gh.sh b/CI/tests/test_cpu_hog_gh.sh deleted file mode 100755 index 53019485c..000000000 --- a/CI/tests/test_cpu_hog_gh.sh +++ /dev/null @@ -1,20 +0,0 @@ -set -xeEo pipefail - -source CI/tests/common.sh - -trap error ERR -trap finish EXIT - - -function functional_test_litmus_cpu { - [ -z $NODE_NAME ] && echo "[ERR] NODE_NAME variable not set, failing." && exit 1 - yq -i ' .spec.experiments = [{"name": "node-cpu-hog", "spec":{"components":{"env":[{"name":"TOTAL_CHAOS_DURATION","value":"10"},{"name":"NODE_CPU_CORE","value":"1"},{"name":"NODES_AFFECTED_PERC","value":"30"},{"name":"TARGET_NODES","value":"'$NODE_NAME'"}]}}}]' CI/scenarios/node_cpu_hog_engine_node.yaml - - cp CI/config/common_test_config.yaml CI/config/litmus_config.yaml - yq '.kraken.chaos_scenarios = [{"litmus_scenarios":[["scenarios/openshift/templates/litmus-rbac.yaml","CI/scenarios/node_cpu_hog_engine_node.yaml"]]}]' -i CI/config/litmus_config.yaml - - python3 -m coverage run -a run_kraken.py -c CI/config/litmus_config.yaml - echo "Litmus scenario test: Success" -} - -functional_test_litmus_cpu diff --git a/CI/tests/test_io_hog.sh b/CI/tests/test_io_hog.sh deleted file mode 100755 index d4ce24e8a..000000000 --- a/CI/tests/test_io_hog.sh +++ /dev/null @@ -1,20 +0,0 @@ -set -xeEo pipefail - -source CI/tests/common.sh - -trap error ERR -trap finish EXIT - - -function functional_test_litmus_io { - - export scenario_type="litmus_scenarios" - export scenario_file="- scenarios/templates/litmus-rbac.yaml" - export post_config="- CI/scenarios/node_io_engine_node.yaml" - envsubst < CI/config/common_test_config.yaml > CI/config/litmus_config.yaml - envsubst < CI/scenarios/node_io_engine.yaml > CI/scenarios/node_io_engine_node.yaml - python3 -m coverage run -a run_kraken.py -c CI/config/litmus_config.yaml - echo "Litmus scenario test: Success" -} - -functional_test_litmus_io diff --git a/CI/tests/test_io_hog_gh.sh b/CI/tests/test_io_hog_gh.sh deleted file mode 100755 index 8fd2b0e1a..000000000 --- a/CI/tests/test_io_hog_gh.sh +++ /dev/null @@ -1,19 +0,0 @@ -set -xeEo pipefail - -source CI/tests/common.sh - -trap error ERR -trap finish EXIT - - -function functional_test_litmus_io { - [ -z $NODE_NAME ] && echo "[ERR] NODE_NAME variable not set, failing." && exit 1 - yq -i ' .spec.experiments = [{"name": "node-io-stress", "spec":{"components":{"env":[{"name":"TOTAL_CHAOS_DURATION","value":"10"},{"name":"FILESYSTEM_UTILIZATION_PERCENTAGE","value":"100"},{"name":"CPU","value":"1"},{"name":"NUMBER_OF_WORKERS","value":"3"},{"name":"TARGET_NODES","value":"'$NODE_NAME'"}]}}}]' CI/scenarios/node_io_engine_node.yaml - cp CI/config/common_test_config.yaml CI/config/litmus_config.yaml - yq '.kraken.chaos_scenarios = [{"litmus_scenarios":[["scenarios/openshift/templates/litmus-rbac.yaml","CI/scenarios/node_io_engine_node.yaml"]]}]' -i CI/config/litmus_config.yaml - - python3 -m coverage run -a run_kraken.py -c CI/config/litmus_config.yaml - echo "Litmus scenario test: Success" -} - -functional_test_litmus_io diff --git a/CI/tests/test_mem_hog.sh b/CI/tests/test_mem_hog.sh deleted file mode 100755 index 98832cf01..000000000 --- a/CI/tests/test_mem_hog.sh +++ /dev/null @@ -1,20 +0,0 @@ -set -xeEo pipefail - -source CI/tests/common.sh - -trap error ERR -trap finish EXIT - - -function functional_test_litmus_mem { - - export scenario_type="litmus_scenarios" - export scenario_file="- scenarios/templates/litmus-rbac.yaml" - export post_config="- CI/scenarios/node_mem_engine_node.yaml" - envsubst < CI/config/common_test_config.yaml > CI/config/litmus_config.yaml - envsubst < CI/scenarios/node_mem_engine.yaml > CI/scenarios/node_mem_engine_node.yaml - python3 -m coverage run -a run_kraken.py -c CI/config/litmus_config.yaml - echo "Litmus scenario $1 test: Success" -} - -functional_test_litmus_mem "- CI/scenarios/node_mem_engine.yaml" diff --git a/CI/tests/test_mem_hog_gh.sh b/CI/tests/test_mem_hog_gh.sh deleted file mode 100755 index c46931393..000000000 --- a/CI/tests/test_mem_hog_gh.sh +++ /dev/null @@ -1,19 +0,0 @@ -set -xeEo pipefail - -source CI/tests/common.sh - -trap error ERR -trap finish EXIT - - -function functional_test_litmus_mem { - [ -z $NODE_NAME ] && echo "[ERR] NODE_NAME variable not set, failing." && exit 1 - yq -i ' .spec.experiments = [{"name": "node-io-stress", "spec":{"components":{"env":[{"name":"TOTAL_CHAOS_DURATION","value":"10"},{"name":"CPU","value":"1"},{"name":"TARGET_NODES","value":"'$NODE_NAME'"}]}}}]' CI/scenarios/node_mem_engine_node.yaml - cp CI/config/common_test_config.yaml CI/config/litmus_config.yaml - yq '.kraken.chaos_scenarios = [{"litmus_scenarios":[["scenarios/openshift/templates/litmus-rbac.yaml","CI/scenarios/node_mem_engine_node.yaml"]]}]' -i CI/config/litmus_config.yaml - - python3 -m coverage run -a run_kraken.py -c CI/config/litmus_config.yaml - echo "Litmus scenario test: Success" -} - -functional_test_litmus_mem diff --git a/CI/tests/test_telemetry.sh b/CI/tests/test_telemetry.sh new file mode 100644 index 000000000..65d2109c0 --- /dev/null +++ b/CI/tests/test_telemetry.sh @@ -0,0 +1,31 @@ +set -xeEo pipefail + +source CI/tests/common.sh + +trap error ERR +trap finish EXIT + + +function functional_test_telemetry { + # runs cpu hog and collect + telemetry_id="funtest-`date +%s`" + export scenario_type="arcaflow_scenarios" + export scenario_file="CI/scenarios/arcaflow/cpu-hog/input.yaml" + export post_config="" + envsubst < CI/config/common_test_config.yaml > CI/config/telemetry.yaml + yq -i '.telemetry.enabled=True' CI/config/arca_cpu_hog.yaml + yq -i '.telemetry.username="'$telemetry_username'"' CI/config/telemetry.yaml + yq -i '.telemetry.password="'$telemetry_password'"' CI/config/telemetry.yaml + yq -i '.telemetry.backup_threads=1' CI/config/telemetry.yaml + yq -i '.telemetry.run_tag="'$telemetry_id'"' CI/config/telemetry.yaml + python3 -m coverage run -a run_kraken.py -c CI/config/telemetry.yaml + test_folder="`aws s3 ls s3://$telemetry_bucket | awk '{ print $2 }' | grep $telemetry_id`" + [ -z $test_folder ] && echo "[ERROR] telemetry folder not created" && exit 1 + readarray files < <(aws s3 ls s3://$telemetry_bucket/$test_folder | awk '{ print $4 }') + [[ ${#files[@]} == 0 ]] && echo "[ERROR] no telemetry files uploaded " && exit 1 + echo -e "telemetry files successfully uploaded:\n ${files[*]}" + echo "Arcaflow Telemetry: Success" + +} + +functional_test_telemetry \ No newline at end of file