From c0df844cc3d1a0a3787644b4675fd18aab8b40f1 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Wed, 1 May 2024 14:43:32 -0400
Subject: [PATCH 01/17] Move playbook tasks to role

In preparation for other refactors.
This will allow for setting some default values for vars, so that the existing playbooks still run as they did previously without need to update the scripts running them.
having the role will also allow to use the file/ and templates/ sub-directories, and add some additional configurability to the role.
---
 playbooks/autoscaling_osp18.yaml              |  12 ++
 playbooks/configure_heat.yaml                 | 178 +-----------------
 playbooks/creating_stack.yaml                 |  92 +--------
 playbooks/test_autoscaling.yaml               |  72 +------
 playbooks/verify_autoscaling.yaml             |  41 +---
 roles/telemetry_autoscaling/README.md         |  38 ++++
 roles/telemetry_autoscaling/defaults/main.yml |   5 +
 roles/telemetry_autoscaling/meta/main.yml     |  52 +++++
 .../tasks/configure_heat.yml                  | 178 ++++++++++++++++++
 .../tasks/creating_stack.yml                  |  86 +++++++++
 roles/telemetry_autoscaling/tasks/main.yml    |   7 +
 .../tasks/test_autoscaling.yml                |  69 +++++++
 .../tasks/verify_autoscaling.yml              |  42 +++++
 13 files changed, 501 insertions(+), 371 deletions(-)
 create mode 100644 playbooks/autoscaling_osp18.yaml
 create mode 100644 roles/telemetry_autoscaling/README.md
 create mode 100644 roles/telemetry_autoscaling/defaults/main.yml
 create mode 100644 roles/telemetry_autoscaling/meta/main.yml
 create mode 100644 roles/telemetry_autoscaling/tasks/configure_heat.yml
 create mode 100644 roles/telemetry_autoscaling/tasks/creating_stack.yml
 create mode 100644 roles/telemetry_autoscaling/tasks/main.yml
 create mode 100644 roles/telemetry_autoscaling/tasks/test_autoscaling.yml
 create mode 100644 roles/telemetry_autoscaling/tasks/verify_autoscaling.yml

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
new file mode 100644
index 0000000..75b7fd4
--- /dev/null
+++ b/playbooks/autoscaling_osp18.yaml
@@ -0,0 +1,12 @@
+---
+- hosts: localhost
+  become: no
+  name: Run the autoscaling tests
+  vars:
+    openstack_cmd: "oc rsh openstackclient openstack"
+    metrics_backend: "prometheus"
+
+  tasks:
+    - import_role:
+        name: '../roles/telemetry_autoscaling'
+
diff --git a/playbooks/configure_heat.yaml b/playbooks/configure_heat.yaml
index 8df3966..4c23e83 100644
--- a/playbooks/configure_heat.yaml
+++ b/playbooks/configure_heat.yaml
@@ -6,181 +6,9 @@
 - hosts: undercloud
   become: no
   name: Using the heat service for autoscaling
-  vars:
-    openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}"
-    stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}"
   tasks:
-
-    - name: Create the generic archive policy for autoscaling
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} metric archive-policy create generic \
-        --back-window 0 \
-        --definition timespan:'4:00:00',granularity:'0:01:00',points:240 \
-        --aggregation-method 'rate:mean' \
-        --aggregation-method 'mean';
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Verify that the archive policy was created
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} metric archive-policy show generic;
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Create "vnf" directory under templates
-      shell: |
-        mkdir -p $HOME/templates/autoscaling/vnf/
-
-    - name: Configure heat template for automatically scaling instances
-      copy:
-        dest: ~/templates/autoscaling/vnf/instance.yaml
-        content: |
-          heat_template_version: wallaby
-          description: Template to control scaling of VNF instance
-
-          parameters:
-            metadata:
-              type: json
-            image:
-              type: string
-              description: image used to create instance
-              default: workload_image_1
-            flavor:
-              type: string
-              description: instance flavor to be used
-              default: workload_flavor_1
-            key_name:
-              type: string
-              description: keypair to be used
-              default: workload_key_1
-            network:
-              type: string
-              description: project network to attach instance to
-              default: workload_internal_net_1
-            external_network:
-              type: string
-              description: network used for floating IPs
-              default: public
-
-          resources:
-            vnf:
-              type: OS::Nova::Server
-              properties:
-                flavor: {get_param: flavor}
-                key_name: {get_param: key_name}
-                image: { get_param: image }
-                metadata: { get_param: metadata }
-                networks:
-                  - port: { get_resource: port }
-
-            port:
-              type: OS::Neutron::Port
-              properties:
-                network: {get_param: network}
-                security_groups:
-                  - workload_secgroup_1
-
-            floating_ip:
-              type: OS::Neutron::FloatingIP
-              properties:
-                floating_network: {get_param: external_network }
-
-            floating_ip_assoc:
-              type: OS::Neutron::FloatingIPAssociation
-              properties:
-                floatingip_id: { get_resource: floating_ip }
-                port_id: { get_resource: port }
-
-    - name: Create the resource to reference in the heat template
-      copy:
-        dest: ~/templates/autoscaling/vnf/resources.yaml
-        content: |
-          resource_registry:
-            "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml
-
-    - name: Create the deployment template for heat to control instance scaling
-      copy:
-        dest: ~/templates/autoscaling/vnf/template.yaml
-        content: |
-          heat_template_version: wallaby
-          description:  Example auto scale group, policy and alarm
-          resources:
-            scaleup_group:
-              type: OS::Heat::AutoScalingGroup
-              properties:
-                max_size: 3
-                min_size: 1
-                #desired_capacity: 1
-                resource:
-                  type: OS::Nova::Server::VNF
-                  properties:
-                    metadata: {"metering.server_group": {get_param: "OS::stack_id"}}
-
-            scaleup_policy:
-              type: OS::Heat::ScalingPolicy
-              properties:
-                adjustment_type: change_in_capacity
-                auto_scaling_group_id: { get_resource: scaleup_group }
-                cooldown: 60
-                scaling_adjustment: 1
-
-            scaledown_policy:
-              type: OS::Heat::ScalingPolicy
-              properties:
-                adjustment_type: change_in_capacity
-                auto_scaling_group_id: { get_resource: scaleup_group }
-                cooldown: 60
-                scaling_adjustment: -1
-
-            cpu_alarm_high:
-              type: OS::Aodh::GnocchiAggregationByResourcesAlarm
-              properties:
-                description: Scale up instance if CPU > 50%
-                metric: cpu
-                aggregation_method: rate:mean
-                granularity: 300
-                evaluation_periods: 1
-                threshold: 30000000000.0
-                resource_type: instance
-                comparison_operator: gt
-                alarm_actions:
-                  - str_replace:
-                      template: trust+url
-                      params:
-                        url: {get_attr: [scaleup_policy, signal_url]}
-                query:
-                  list_join:
-                    - ''
-                    - - {'=': {server_group: {get_param: "OS::stack_id"}}}
-
-            cpu_alarm_low:
-              type: OS::Aodh::GnocchiAggregationByResourcesAlarm
-              properties:
-                description: Scale down instance if CPU < 20%
-                metric: cpu
-                aggregation_method: rate:mean
-                granularity: 300
-                evaluation_periods: 1
-                threshold: 12000000000.0
-                resource_type: instance
-                comparison_operator: lt
-                alarm_actions:
-                  - str_replace:
-                      template: trust+url
-                      params:
-                        url: {get_attr: [scaledown_policy, signal_url]}
-                query:
-                  list_join:
-                    - ''
-                    - - {'=': {server_group: {get_param: "OS::stack_id"}}}
-
-          outputs:
-            scaleup_policy_signal_url:
-              value: {get_attr: [scaleup_policy, alarm_url]}
-
-            scaledown_policy_signal_url:
-              value: {get_attr: [scaledown_policy, alarm_url]}
+    - import_role:
+        name: '../roles/telemetry_autoscaling'
+        tasks_from: configure_heat
 
 ...
diff --git a/playbooks/creating_stack.yaml b/playbooks/creating_stack.yaml
index 6a2011e..860c9a1 100644
--- a/playbooks/creating_stack.yaml
+++ b/playbooks/creating_stack.yaml
@@ -6,95 +6,9 @@
 - hosts: undercloud
   become: no
   name: Creating the stack deployment for autoscaling
-  vars:
-    openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}"
-    stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}"
   tasks:
-
-    - name: Create the stack
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} stack create \
-        -t $HOME/templates/autoscaling/vnf/template.yaml \
-        -e $HOME/templates/autoscaling/vnf/resources.yaml \
-        {{ stack_name }};
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Wait for 60 sec
-      pause:
-        minutes: 1
-
-    - name: Verify that the stack was created successfully
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status;
-      register: result
-      failed_when: '"CREATE_COMPLETE" not in result.stdout'
-
-    - name: Verify that the stack resources are created
-      shell: |
-        source ~/overcloudrc;
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-        {{ openstack_cmd }} stack resource list $STACK_ID;
-      register: result
-      failed_when: '"CREATE_COMPLETE" not in result.stdout'
-
-    - name: Verify that an instance was launched by the stack creation
-      shell: |
-        source ~/overcloudrc;
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-        {{ openstack_cmd }} server list --long | grep $STACK_ID;
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Verify that the alarms were created for the stack
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} alarm list
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Note the physical_resource_id values for the cpu_alarm_low resource
-      shell: |
-        source ~/overcloudrc;
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-        {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}'
-      register: physical_resource_id_low
-
-    - name: Note the physical_resource_id values for the cpu_alarm_high resource
-      shell: |
-        source ~/overcloudrc;
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-        {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}'
-      register: physical_resource_id_high
-
-    - name: Verify physical_resource_id match the alarm id for cpu_alarm_low
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}'
-      register: alarm_id_low
-      failed_when: 
-        - physical_resource_id_low.stdout != alarm_id_low.stdout
-
-    - name: Verify physical_resource_id match the alarm id for cpu_alarm_high
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}'
-      register: alarm_id_high
-      failed_when: 
-        - physical_resource_id_high.stdout != alarm_id_high.stdout
-
-    - name: Verify that metric resources exist for the stack
-      shell: |
-        source ~/overcloudrc;
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-        {{ openstack_cmd }} metric resource search \
-        --sort-column launched_at -c id \
-        -c display_name -c launched_at \
-        -c deleted_at --type instance \
-        server_group="$STACK_ID"
-      register: result
-      failed_when: result.rc >= 1
+    - include_role:
+        name: '../roles/telemetry_autoscaling'
+        tasks_from: creating_stack
 
 ...
diff --git a/playbooks/test_autoscaling.yaml b/playbooks/test_autoscaling.yaml
index 9a768fa..06ead50 100644
--- a/playbooks/test_autoscaling.yaml
+++ b/playbooks/test_autoscaling.yaml
@@ -6,74 +6,8 @@
 - hosts: undercloud
   become: no
   name: Test automatic scaling of instances
-  vars:
-    openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}"
-    stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}"
   tasks:
-
-    - name: register instance IP
-      shell: |
-        source ~/overcloudrc;
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value)
-        {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \
-            grep -i $STACK_ID | \
-            awk  -F'=' '{print $2}' | \
-            awk -F',' '{print $1}'
-      register: vnf_instance_ip
-
-    - name: Verfiy the number of instances before scaling
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l
-      register: instance_count1
-
-    - name: Test automatic scaling up of instances
-      shell: |
-        sshpass -p gocubsgo ssh cirros@{{ vnf_instance_ip.stdout }} "sudo yes > /dev/null &"
-      register: result
-
-    - name: Verify that the alarm has been triggered
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} alarm list -c state -c name -f value| \
-            grep -i "cpu_alarm_high" | \
-            awk '{print $2}'
-      retries: 100
-      delay: 5
-      register: result
-      until: result.stdout == "alarm"
-
-    - name: Verify that the Orchestration service has scaled up the instances 
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l
-      retries: 100
-      delay: 5
-      register: instance_count2
-      until: instance_count2.stdout == "3"
-    
-    - name: Test automatic scaling down of instances
-      pause:
-        minutes: 5
-
-    - name: Verify that the alarm has been triggered
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} alarm list -c state -c name -f value| \
-            grep -i "cpu_alarm_low" | \
-            awk '{print $2}'
-      retries: 100
-      delay: 5
-      register: result
-      until: result.stdout == "alarm"
-
-    - name: Verify that the Orchestration service has scaled down the instances
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l
-      retries: 100
-      delay: 5
-      register: instance_count3
-      until: instance_count3.stdout == "1"
-
+    - import_role:
+        name: '../roles/telemetry_autoscaling'
+        tasks_from: test_autoscaling
 ...
diff --git a/playbooks/verify_autoscaling.yaml b/playbooks/verify_autoscaling.yaml
index fe2de50..e88a0d3 100644
--- a/playbooks/verify_autoscaling.yaml
+++ b/playbooks/verify_autoscaling.yaml
@@ -6,43 +6,8 @@
 - hosts: undercloud
   become: no
   name: Verifying the overcloud deployment for autoscaling
-  vars:
-    openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}"
-    stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}"
   tasks:
-
-    - name: test service API endpoint(metric) for autoscaling
-      shell: |
-        source ~/stackrc;
-        {{ openstack_cmd }} endpoint list --service metric;
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: test service API endpoint(alarm) for autoscaling
-      shell: |
-        source ~/stackrc;
-        {{ openstack_cmd }} endpoint list --service alarming;
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: test service API endpoint(heat) for autoscaling
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} endpoint list --service orchestration;
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Verify that the services are running on the overcloud
-      shell: |
-        source ~/overcloudrc;
-        sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh';
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Verify that the time-series database service is available
-      shell: |
-        source ~/overcloudrc;
-        {{ openstack_cmd }} metric status --fit-width;
-      register: result
-      failed_when: result.rc >= 1
+    - import_role:
+        name: '../roles/telemetry_autoscaling'
+        tasks_from: verify_autoscaling
 ...
diff --git a/roles/telemetry_autoscaling/README.md b/roles/telemetry_autoscaling/README.md
new file mode 100644
index 0000000..225dd44
--- /dev/null
+++ b/roles/telemetry_autoscaling/README.md
@@ -0,0 +1,38 @@
+Role Name
+=========
+
+A brief description of the role goes here.
+
+Requirements
+------------
+
+Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required.
+
+Role Variables
+--------------
+
+A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well.
+
+Dependencies
+------------
+
+A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles.
+
+Example Playbook
+----------------
+
+Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:
+
+    - hosts: servers
+      roles:
+         - { role: username.rolename, x: 42 }
+
+License
+-------
+
+BSD
+
+Author Information
+------------------
+
+An optional section for the role authors to include contact information, or a website (HTML is not allowed).
diff --git a/roles/telemetry_autoscaling/defaults/main.yml b/roles/telemetry_autoscaling/defaults/main.yml
new file mode 100644
index 0000000..0bd5b91
--- /dev/null
+++ b/roles/telemetry_autoscaling/defaults/main.yml
@@ -0,0 +1,5 @@
+---
+# defaults file for telemetry_autoscaling
+openstack_cmd: "openstack"
+stack_name: vnf
+metrics_backend: gnocchi
diff --git a/roles/telemetry_autoscaling/meta/main.yml b/roles/telemetry_autoscaling/meta/main.yml
new file mode 100644
index 0000000..c572acc
--- /dev/null
+++ b/roles/telemetry_autoscaling/meta/main.yml
@@ -0,0 +1,52 @@
+galaxy_info:
+  author: your name
+  description: your role description
+  company: your company (optional)
+
+  # If the issue tracker for your role is not on github, uncomment the
+  # next line and provide a value
+  # issue_tracker_url: http://example.com/issue/tracker
+
+  # Choose a valid license ID from https://spdx.org - some suggested licenses:
+  # - BSD-3-Clause (default)
+  # - MIT
+  # - GPL-2.0-or-later
+  # - GPL-3.0-only
+  # - Apache-2.0
+  # - CC-BY-4.0
+  license: license (GPL-2.0-or-later, MIT, etc)
+
+  min_ansible_version: 2.1
+
+  # If this a Container Enabled role, provide the minimum Ansible Container version.
+  # min_ansible_container_version:
+
+  #
+  # Provide a list of supported platforms, and for each platform a list of versions.
+  # If you don't wish to enumerate all versions for a particular platform, use 'all'.
+  # To view available platforms and versions (or releases), visit:
+  # https://galaxy.ansible.com/api/v1/platforms/
+  #
+  # platforms:
+  # - name: Fedora
+  #   versions:
+  #   - all
+  #   - 25
+  # - name: SomePlatform
+  #   versions:
+  #   - all
+  #   - 1.0
+  #   - 7
+  #   - 99.99
+
+  galaxy_tags: []
+    # List tags for your role here, one per line. A tag is a keyword that describes
+    # and categorizes the role. Users find roles by searching for tags. Be sure to
+    # remove the '[]' above, if you add tags to this list.
+    #
+    # NOTE: A tag is limited to a single word comprised of alphanumeric characters.
+    #       Maximum 20 tags per role.
+
+dependencies: []
+  # List your role dependencies here, one per line. Be sure to remove the '[]' above,
+  # if you add dependencies to this list.
diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml
new file mode 100644
index 0000000..58db9e5
--- /dev/null
+++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml
@@ -0,0 +1,178 @@
+---
+- set_fact:
+    metrics_backend: "gnocchi"
+  when: not (metrics_backend is defined)
+
+- name: Create the generic archive policy for autoscaling
+  when: metrics_backend == "gnocchi"
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} metric archive-policy create generic \
+    --back-window 0 \
+    --definition timespan:'4:00:00',granularity:'0:01:00',points:240 \
+    --aggregation-method 'rate:mean' \
+    --aggregation-method 'mean';
+  register: result
+  failed_when: result.rc >= 1
+
+- name: Verify that the archive policy was created
+  when: metrics_backend == "gnocchi"
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} metric archive-policy show generic;
+  register: result
+  failed_when: result.rc >= 1
+
+- name: Create "vnf" directory under templates
+  shell: |
+    mkdir -p $HOME/templates/autoscaling/vnf/
+
+- name: Configure heat template for automatically scaling instances
+  copy:
+    dest: ~/templates/autoscaling/vnf/instance.yaml
+    content: |
+      heat_template_version: wallaby
+      description: Template to control scaling of VNF instance
+
+      parameters:
+        metadata:
+          type: json
+        image:
+          type: string
+          description: image used to create instance
+          default: workload_image_1
+        flavor:
+          type: string
+          description: instance flavor to be used
+          default: workload_flavor_1
+        key_name:
+          type: string
+          description: keypair to be used
+          default: workload_key_1
+        network:
+          type: string
+          description: project network to attach instance to
+          default: workload_internal_net_1
+        external_network:
+          type: string
+          description: network used for floating IPs
+          default: public
+
+      resources:
+        vnf:
+          type: OS::Nova::Server
+          properties:
+            flavor: {get_param: flavor}
+            key_name: {get_param: key_name}
+            image: { get_param: image }
+            metadata: { get_param: metadata }
+            networks:
+              - port: { get_resource: port }
+
+        port:
+          type: OS::Neutron::Port
+          properties:
+            network: {get_param: network}
+            security_groups:
+              - workload_secgroup_1
+
+        floating_ip:
+          type: OS::Neutron::FloatingIP
+          properties:
+            floating_network: {get_param: external_network }
+
+        floating_ip_assoc:
+          type: OS::Neutron::FloatingIPAssociation
+          properties:
+            floatingip_id: { get_resource: floating_ip }
+            port_id: { get_resource: port }
+
+- name: Create the resource to reference in the heat template
+  copy:
+    dest: ~/templates/autoscaling/vnf/resources.yaml
+    content: |
+      resource_registry:
+        "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml
+
+- name: Create the deployment template for heat to control instance scaling
+  copy:
+    dest: ~/templates/autoscaling/vnf/template.yaml
+    content: |
+      heat_template_version: wallaby
+      description:  Example auto scale group, policy and alarm
+      resources:
+        scaleup_group:
+          type: OS::Heat::AutoScalingGroup
+          properties:
+            max_size: 3
+            min_size: 1
+            #desired_capacity: 1
+            resource:
+              type: OS::Nova::Server::VNF
+              properties:
+                metadata: {"metering.server_group": {get_param: "OS::stack_id"}}
+
+        scaleup_policy:
+          type: OS::Heat::ScalingPolicy
+          properties:
+            adjustment_type: change_in_capacity
+            auto_scaling_group_id: { get_resource: scaleup_group }
+            cooldown: 60
+            scaling_adjustment: 1
+
+        scaledown_policy:
+          type: OS::Heat::ScalingPolicy
+          properties:
+            adjustment_type: change_in_capacity
+            auto_scaling_group_id: { get_resource: scaleup_group }
+            cooldown: 60
+            scaling_adjustment: -1
+
+        cpu_alarm_high:
+          type: OS::Aodh::GnocchiAggregationByResourcesAlarm
+          properties:
+            description: Scale up instance if CPU > 50%
+            metric: cpu
+            aggregation_method: rate:mean
+            granularity: 300
+            evaluation_periods: 1
+            threshold: 30000000000.0
+            resource_type: instance
+            comparison_operator: gt
+            alarm_actions:
+              - str_replace:
+                  template: trust+url
+                  params:
+                    url: {get_attr: [scaleup_policy, signal_url]}
+            query:
+              list_join:
+                - ''
+                - - {'=': {server_group: {get_param: "OS::stack_id"}}}
+
+        cpu_alarm_low:
+          type: OS::Aodh::GnocchiAggregationByResourcesAlarm
+          properties:
+            description: Scale down instance if CPU < 20%
+            metric: cpu
+            aggregation_method: rate:mean
+            granularity: 300
+            evaluation_periods: 1
+            threshold: 12000000000.0
+            resource_type: instance
+            comparison_operator: lt
+            alarm_actions:
+              - str_replace:
+                  template: trust+url
+                  params:
+                    url: {get_attr: [scaledown_policy, signal_url]}
+            query:
+              list_join:
+                - ''
+                - - {'=': {server_group: {get_param: "OS::stack_id"}}}
+
+      outputs:
+        scaleup_policy_signal_url:
+          value: {get_attr: [scaleup_policy, alarm_url]}
+
+        scaledown_policy_signal_url:
+          value: {get_attr: [scaledown_policy, alarm_url]}
diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
new file mode 100644
index 0000000..b5235d2
--- /dev/null
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -0,0 +1,86 @@
+---
+- name: Create the stack
+  shell: |
+    #source ~/overcloudrc;
+    {{ openstack_cmd }} stack create \
+    -t $HOME/templates/autoscaling/vnf/template.yaml \
+    -e $HOME/templates/autoscaling/vnf/resources.yaml \
+    {{ stack_name }};
+  register: result
+  failed_when: result.rc >= 1
+
+- name: Wait for 60 sec
+  pause:
+    minutes: 1
+
+- name: Verify that the stack was created successfully
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status;
+  register: result
+  failed_when: '"CREATE_COMPLETE" not in result.stdout'
+
+- name: Verify that the stack resources are created
+  shell: |
+    # source ~/overcloudrc;
+    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
+    {{ openstack_cmd }} stack resource list $STACK_ID;
+  register: result
+  failed_when: '"CREATE_COMPLETE" not in result.stdout'
+
+- name: Verify that an instance was launched by the stack creation
+  shell: |
+    # source ~/overcloudrc;
+    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
+    {{ openstack_cmd }} server list --long | grep $STACK_ID;
+  register: result
+  failed_when: result.rc >= 1
+
+- name: Verify that the alarms were created for the stack
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} alarm list
+  register: result
+  failed_when: result.rc >= 1
+
+- name: Note the physical_resource_id values for the cpu_alarm_low resource
+  shell: |
+    # source ~/overcloudrc;
+    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
+    {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}'
+  register: physical_resource_id_low
+
+- name: Note the physical_resource_id values for the cpu_alarm_high resource
+  shell: |
+    # source ~/overcloudrc;
+    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
+    {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}'
+  register: physical_resource_id_high
+
+- name: Verify physical_resource_id match the alarm id for cpu_alarm_low
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}'
+  register: alarm_id_low
+  failed_when: 
+    - physical_resource_id_low.stdout != alarm_id_low.stdout
+
+- name: Verify physical_resource_id match the alarm id for cpu_alarm_high
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}'
+  register: alarm_id_high
+  failed_when: 
+    - physical_resource_id_high.stdout != alarm_id_high.stdout
+
+- name: Verify that metric resources exist for the stack
+  shell: |
+    # source ~/overcloudrc;
+    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
+    {{ openstack_cmd }} metric resource search \
+    --sort-column launched_at -c id \
+    -c display_name -c launched_at \
+    -c deleted_at --type instance \
+    server_group="$STACK_ID"
+  register: result
+  failed_when: result.rc >= 1
diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml
new file mode 100644
index 0000000..d4c7e17
--- /dev/null
+++ b/roles/telemetry_autoscaling/tasks/main.yml
@@ -0,0 +1,7 @@
+---
+# tasks file for telemetry_autoscaling
+- include_tasks: verify_autoscaling.yml
+- include_tasks: configure_heat.yml
+- include_tasks: creating_stack.yml
+- include_tasks: test_autoscaling.yml
+
diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
new file mode 100644
index 0000000..98951cb
--- /dev/null
+++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
@@ -0,0 +1,69 @@
+---
+- name: register instance IP
+  shell: |
+    # source ~/overcloudrc;
+    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value)
+    {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \
+        grep -i $STACK_ID | \
+        awk  -F'=' '{print $2}' | \
+        awk '{print $1}'
+        #awk -F',' '{print $1}'
+  register: vnf_instance_ip
+
+- debug: var=vnf_instance_ip
+
+- name: Verfiy the number of instances before scaling
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l
+  register: instance_count1
+
+- name: Test automatic scaling up of instances
+  shell: |
+    sshpass -p gocubsgo ssh cirros@{{ item }} "sudo yes > /dev/null &"
+  register: result
+  with_items: "{{ vnf_instance_ip.stdout_lines }}"
+
+- name: Verify that the alarm has been triggered
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} alarm list -c state -c name -f value| \
+        grep -i "cpu_alarm_high" | \
+        awk '{print $2}'
+  retries: 100
+  delay: 5
+  register: result
+  until: result.stdout == "alarm"
+
+- name: Verify that the Orchestration service has scaled up the instances 
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l
+  retries: 100
+  delay: 5
+  register: instance_count2
+  until: instance_count2.stdout == "3"
+
+- name: Test automatic scaling down of instances
+  pause:
+    minutes: 5
+
+- name: Verify that the alarm has been triggered
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} alarm list -c state -c name -f value| \
+        grep -i "cpu_alarm_low" | \
+        awk '{print $2}'
+  retries: 100
+  delay: 5
+  register: result
+  until: result.stdout == "alarm"
+
+- name: Verify that the Orchestration service has scaled down the instances
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l
+  retries: 100
+  delay: 5
+  register: instance_count3
+  until: instance_count3.stdout == "1"
diff --git a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
new file mode 100644
index 0000000..f3f90d0
--- /dev/null
+++ b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
@@ -0,0 +1,42 @@
+---
+# This doesn't work when using prom as the metrics backend
+# TODO: Add a metrics_backend option
+- name: test service API endpoint(metric) for autoscaling
+  when: metrics_backend == "gnocchi"
+  shell: |
+    #source ~/stackrc;
+    {{ openstack_cmd }} endpoint list --service metric;
+  register: result
+  failed_when: result.rc >= 1
+
+- name: test service API endpoint(alarm) for autoscaling
+  shell: |
+    #source ~/stackrc;
+    {{ openstack_cmd }} endpoint list --service alarming;
+  register: result
+  failed_when: result.rc >= 1
+
+- name: test service API endpoint(heat) for autoscaling
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} endpoint list --service orchestration;
+  register: result
+  failed_when: result.rc >= 1
+
+  # need selection criteria to decide when to run these.
+  # Need alternative for OSP18.
+- name: Verify that the services are running on the overcloud
+  shell: |
+    # source ~/overcloudrc;
+    sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh';
+  register: result
+  failed_when: result.rc >= 1
+  ignore_errors: true
+
+- name: Verify that the time-series database service is available
+  when: metrics_backend == "gnocchi"
+  shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} metric status --fit-width;
+  register: result
+  failed_when: result.rc >= 1

From 0cc659a5ae05ba1cf086037ad630cd83285ba33b Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Wed, 1 May 2024 15:08:59 -0400
Subject: [PATCH 02/17] [ansible-lint] Lint the role

* remove trailing whitespace
* use fqcn for modules
* resolve "no-free-form"
* name[casing]: All names should start with an uppercase letter.
* name[missing]: All tasks should be named.
* yaml[empty-lines]: Too many blank lines (1 > 0)
---
 .../tasks/configure_heat.yml                  | 16 +++++-------
 .../tasks/creating_stack.yml                  | 26 +++++++++----------
 roles/telemetry_autoscaling/tasks/main.yml    | 14 +++++++---
 .../tasks/test_autoscaling.yml                | 24 +++++++++--------
 .../tasks/verify_autoscaling.yml              | 16 ++++++------
 5 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml
index 58db9e5..c203168 100644
--- a/roles/telemetry_autoscaling/tasks/configure_heat.yml
+++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml
@@ -1,11 +1,7 @@
 ---
-- set_fact:
-    metrics_backend: "gnocchi"
-  when: not (metrics_backend is defined)
-
 - name: Create the generic archive policy for autoscaling
   when: metrics_backend == "gnocchi"
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} metric archive-policy create generic \
     --back-window 0 \
@@ -17,18 +13,18 @@
 
 - name: Verify that the archive policy was created
   when: metrics_backend == "gnocchi"
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} metric archive-policy show generic;
   register: result
   failed_when: result.rc >= 1
 
 - name: Create "vnf" directory under templates
-  shell: |
+  ansible.builtin.shell: |
     mkdir -p $HOME/templates/autoscaling/vnf/
 
 - name: Configure heat template for automatically scaling instances
-  copy:
+  ansible.builtin.copy:
     dest: ~/templates/autoscaling/vnf/instance.yaml
     content: |
       heat_template_version: wallaby
@@ -88,14 +84,14 @@
             port_id: { get_resource: port }
 
 - name: Create the resource to reference in the heat template
-  copy:
+  ansible.builtin.copy:
     dest: ~/templates/autoscaling/vnf/resources.yaml
     content: |
       resource_registry:
         "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml
 
 - name: Create the deployment template for heat to control instance scaling
-  copy:
+  ansible.builtin.copy:
     dest: ~/templates/autoscaling/vnf/template.yaml
     content: |
       heat_template_version: wallaby
diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index b5235d2..14fc325 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -1,6 +1,6 @@
 ---
 - name: Create the stack
-  shell: |
+  ansible.builtin.shell: |
     #source ~/overcloudrc;
     {{ openstack_cmd }} stack create \
     -t $HOME/templates/autoscaling/vnf/template.yaml \
@@ -10,18 +10,18 @@
   failed_when: result.rc >= 1
 
 - name: Wait for 60 sec
-  pause:
+  ansible.builtin.pause:
     minutes: 1
 
 - name: Verify that the stack was created successfully
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status;
   register: result
   failed_when: '"CREATE_COMPLETE" not in result.stdout'
 
 - name: Verify that the stack resources are created
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
     {{ openstack_cmd }} stack resource list $STACK_ID;
@@ -29,7 +29,7 @@
   failed_when: '"CREATE_COMPLETE" not in result.stdout'
 
 - name: Verify that an instance was launched by the stack creation
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
     {{ openstack_cmd }} server list --long | grep $STACK_ID;
@@ -37,44 +37,44 @@
   failed_when: result.rc >= 1
 
 - name: Verify that the alarms were created for the stack
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} alarm list
   register: result
   failed_when: result.rc >= 1
 
 - name: Note the physical_resource_id values for the cpu_alarm_low resource
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
     {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}'
   register: physical_resource_id_low
 
 - name: Note the physical_resource_id values for the cpu_alarm_high resource
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
     {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}'
   register: physical_resource_id_high
 
 - name: Verify physical_resource_id match the alarm id for cpu_alarm_low
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}'
   register: alarm_id_low
-  failed_when: 
+  failed_when:
     - physical_resource_id_low.stdout != alarm_id_low.stdout
 
 - name: Verify physical_resource_id match the alarm id for cpu_alarm_high
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}'
   register: alarm_id_high
-  failed_when: 
+  failed_when:
     - physical_resource_id_high.stdout != alarm_id_high.stdout
 
 - name: Verify that metric resources exist for the stack
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
     {{ openstack_cmd }} metric resource search \
diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml
index d4c7e17..1d137d0 100644
--- a/roles/telemetry_autoscaling/tasks/main.yml
+++ b/roles/telemetry_autoscaling/tasks/main.yml
@@ -1,7 +1,13 @@
 ---
 # tasks file for telemetry_autoscaling
-- include_tasks: verify_autoscaling.yml
-- include_tasks: configure_heat.yml
-- include_tasks: creating_stack.yml
-- include_tasks: test_autoscaling.yml
+- name: Check pre-reqs for autoscaling
+  ansible.builtin.include_tasks: verify_autoscaling.yml
 
+- name: Create the heat templates
+  ansible.builtin.include_tasks: configure_heat.yml
+
+- name: Launch the stack
+  ansible.builtin.include_tasks: creating_stack.yml
+
+- name: Run the autoscaling tests
+  ansible.builtin.include_tasks: test_autoscaling.yml
diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
index 98951cb..021edf0 100644
--- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
+++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
@@ -1,6 +1,6 @@
 ---
-- name: register instance IP
-  shell: |
+- name: Register instance IP
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value)
     {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \
@@ -10,22 +10,24 @@
         #awk -F',' '{print $1}'
   register: vnf_instance_ip
 
-- debug: var=vnf_instance_ip
+- name: Show the IP
+  ansible.builtin.debug:
+    var: vnf_instance_ip
 
 - name: Verfiy the number of instances before scaling
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l
   register: instance_count1
 
 - name: Test automatic scaling up of instances
-  shell: |
+  ansible.builtin.shell: |
     sshpass -p gocubsgo ssh cirros@{{ item }} "sudo yes > /dev/null &"
   register: result
   with_items: "{{ vnf_instance_ip.stdout_lines }}"
 
 - name: Verify that the alarm has been triggered
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} alarm list -c state -c name -f value| \
         grep -i "cpu_alarm_high" | \
@@ -35,8 +37,8 @@
   register: result
   until: result.stdout == "alarm"
 
-- name: Verify that the Orchestration service has scaled up the instances 
-  shell: |
+- name: Verify that the Orchestration service has scaled up the instances
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l
   retries: 100
@@ -45,11 +47,11 @@
   until: instance_count2.stdout == "3"
 
 - name: Test automatic scaling down of instances
-  pause:
+  ansible.builtin.pause:
     minutes: 5
 
 - name: Verify that the alarm has been triggered
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} alarm list -c state -c name -f value| \
         grep -i "cpu_alarm_low" | \
@@ -60,7 +62,7 @@
   until: result.stdout == "alarm"
 
 - name: Verify that the Orchestration service has scaled down the instances
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l
   retries: 100
diff --git a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
index f3f90d0..9ae42a3 100644
--- a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
+++ b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
@@ -1,23 +1,23 @@
 ---
 # This doesn't work when using prom as the metrics backend
 # TODO: Add a metrics_backend option
-- name: test service API endpoint(metric) for autoscaling
+- name: Test service API endpoint(metric) for autoscaling
   when: metrics_backend == "gnocchi"
-  shell: |
+  ansible.builtin.shell: |
     #source ~/stackrc;
     {{ openstack_cmd }} endpoint list --service metric;
   register: result
   failed_when: result.rc >= 1
 
-- name: test service API endpoint(alarm) for autoscaling
-  shell: |
+- name: Test service API endpoint(alarm) for autoscaling
+  ansible.builtin.shell: |
     #source ~/stackrc;
     {{ openstack_cmd }} endpoint list --service alarming;
   register: result
   failed_when: result.rc >= 1
 
-- name: test service API endpoint(heat) for autoscaling
-  shell: |
+- name: Test service API endpoint(heat) for autoscaling
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} endpoint list --service orchestration;
   register: result
@@ -26,7 +26,7 @@
   # need selection criteria to decide when to run these.
   # Need alternative for OSP18.
 - name: Verify that the services are running on the overcloud
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh';
   register: result
@@ -35,7 +35,7 @@
 
 - name: Verify that the time-series database service is available
   when: metrics_backend == "gnocchi"
-  shell: |
+  ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} metric status --fit-width;
   register: result

From 24313c9dcfc6c7a7769a18150b7ba66683ff21e4 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Sun, 5 May 2024 10:45:44 -0400
Subject: [PATCH 03/17] Get OSP18 tests running

---
 playbooks/autoscaling_osp18.yaml              |  27 +++-
 .../tasks/configure_heat.yml                  | 117 ++++++++++++++++--
 .../tasks/creating_stack.yml                  |  29 ++++-
 .../tasks/test_autoscaling.yml                |  32 ++++-
 4 files changed, 187 insertions(+), 18 deletions(-)

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index 75b7fd4..ca39423 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -2,11 +2,36 @@
 - hosts: localhost
   become: no
   name: Run the autoscaling tests
+  # NOTE: These vars should be eventually moved into a vars file.
   vars:
     openstack_cmd: "oc rsh openstackclient openstack"
     metrics_backend: "prometheus"
-
+    stack_image: "cirros"
+    stack_flavor: "m1.small"
+    stack_network: "private"
+    stack_external_network: "public"
   tasks:
+    # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks
+    - import_role:
+        name: '../name/telemetry_autoscaling'
+        tasks_from: 'verify_autoscaling'
+      tags:
+        - precheck 
     - import_role:
         name: '../roles/telemetry_autoscaling'
+        tasks_from: configure_heat
+          #tasks_from: creating_stack
+      tags:
+        - create
 
+    - import_role:
+        name: '../roles/telemetry_autoscaling'
+        tasks_from: creating_stack
+      tags:
+        - create
+
+    - import_role:
+        name: '../roles/telemetry_autoscaling'
+        tasks_from: test_autoscaling
+      tags:
+        - test
diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml
index c203168..2e7099a 100644
--- a/roles/telemetry_autoscaling/tasks/configure_heat.yml
+++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml
@@ -1,4 +1,10 @@
 ---
+# RE: alarm definitions, it might be cleaner to add two of each alarm, and block off the entire resource, one for gnocchi and one for prom.
+# instead of having if/else blocks inside the resource.
+#
+# TODO(efoley) Consider a heat stack to create the pre-reqs for the autoscaling example.
+# i.e. create the sec group, network, etc
+# TODO(efoley): Move the HOT templates into templates/ dir for the role
 - name: Create the generic archive policy for autoscaling
   when: metrics_backend == "gnocchi"
   ansible.builtin.shell: |
@@ -23,6 +29,8 @@
   ansible.builtin.shell: |
     mkdir -p $HOME/templates/autoscaling/vnf/
 
+  # TODO: Pass the parameters correctly. When I tried to pass the parameters
+  # into the template.yaml file, they weren't passed to the instance initially. 
 - name: Configure heat template for automatically scaling instances
   ansible.builtin.copy:
     dest: ~/templates/autoscaling/vnf/instance.yaml
@@ -36,11 +44,11 @@
         image:
           type: string
           description: image used to create instance
-          default: workload_image_1
+          default: {{ stack_image | default("workload_image_1") }}
         flavor:
           type: string
           description: instance flavor to be used
-          default: workload_flavor_1
+          default: {{ stack_flavor | default("workload_flavor_1") }}
         key_name:
           type: string
           description: keypair to be used
@@ -48,18 +56,30 @@
         network:
           type: string
           description: project network to attach instance to
-          default: workload_internal_net_1
+          default: {{ stack_network | default("workload_internal_net_1") }}
         external_network:
           type: string
           description: network used for floating IPs
-          default: public
+          default: {{ stack_external_network | default("public") }}
+        server_name_prefix:
+          type: string
+          description: a prefix for each server name.
+          default: ""
+        security_group:
+          type: string
+          description: the security group for the instances
+          default: basic
 
       resources:
         vnf:
           type: OS::Nova::Server
           properties:
+            {% if metrics_backend == "prometheus" -%}
+            name:
+              list_join: ["", [{get_param: server_name_prefix}, {get_param: OS::stack_name}]]
+            {% endif -%}
             flavor: {get_param: flavor}
-            key_name: {get_param: key_name}
+            #key_name: {get_param: key_name}
             image: { get_param: image }
             metadata: { get_param: metadata }
             networks:
@@ -70,7 +90,7 @@
           properties:
             network: {get_param: network}
             security_groups:
-              - workload_secgroup_1
+              - { get_param: security_group }
 
         floating_ip:
           type: OS::Neutron::FloatingIP
@@ -88,7 +108,12 @@
     dest: ~/templates/autoscaling/vnf/resources.yaml
     content: |
       resource_registry:
-        "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml
+        "OS::Nova::Server::VNF": ./instance.yaml
+     # parameters:
+     #   image: cirros
+     #   flavor: m1.small
+     #   network: private
+     #   security_group: basic
 
 - name: Create the deployment template for heat to control instance scaling
   ansible.builtin.copy:
@@ -96,18 +121,56 @@
     content: |
       heat_template_version: wallaby
       description:  Example auto scale group, policy and alarm
+      parameters:
+        server_name_prefix:
+          description: A prefix for servers created by this stack. Can be used in queries.
+          type: string
+          default: autoscaling_server_
+            #image:
+            #  type: string
+            #  description: image used to create instance
+            #  default: "{{ stack_network| default( 'workload_image_1') }}"
+            #flavor:
+            #  type: string
+            #  description: instance flavor to be used
+            #  default: "{{ stack_flavor | default('workload_flavor_1') }}"
+            #key_name:
+            #  type: string
+            #  description: keypair to be used
+            #  default: workload_key_1
+            #network:
+            #  type: string
+            #  description: project network to attach instance to
+            #  default: "{{ stack_network | default('workload_internal_net_1') }}"
+            #external_network:
+            #  type: string
+            #  description: network used for floating IPs
+            #  default: public
+
       resources:
         scaleup_group:
           type: OS::Heat::AutoScalingGroup
           properties:
             max_size: 3
             min_size: 1
-            #desired_capacity: 1
+            desired_capacity: 1
             resource:
               type: OS::Nova::Server::VNF
+              # resource definieiton for the resource to be created by the group.
+              #  this passes in parameters to the resource.
+              #  So I can put a name_prefix here and it'll be set as the name...
+              #  But the parameter is needed in the VNF.
+              #  Can I set the name in an OS::Nova::Server?
+              # This sets the property/metadata for the VNF
+              #  name should be set here, but maybe not as metadata
+              # network: { get_param: network }
+              # flavor: { get_param: flavor }
+              # image: { get_param: image }
               properties:
+                server_name_prefix: { get_param: server_name_prefix }
                 metadata: {"metering.server_group": {get_param: "OS::stack_id"}}
 
+
         scaleup_policy:
           type: OS::Heat::ScalingPolicy
           properties:
@@ -125,15 +188,25 @@
             scaling_adjustment: -1
 
         cpu_alarm_high:
+          {% if metrics_backend == "gnocchi" -%}
           type: OS::Aodh::GnocchiAggregationByResourcesAlarm
+          {% endif -%}
+          {% if metrics_backend == "prometheus" -%}
+          type: OS::Aodh::PrometheusAlarm
+          {% endif -%}
           properties:
             description: Scale up instance if CPU > 50%
+            {% if metrics_backend == "gnocchi" -%}
             metric: cpu
             aggregation_method: rate:mean
             granularity: 300
             evaluation_periods: 1
-            threshold: 30000000000.0
             resource_type: instance
+            threshold: 30000000000.0
+            {% endif -%}
+            {% if metrics_backend == "prometheus" -%}
+            threshold: 50
+            {% endif -%}
             comparison_operator: gt
             alarm_actions:
               - str_replace:
@@ -141,20 +214,36 @@
                   params:
                     url: {get_attr: [scaleup_policy, signal_url]}
             query:
+              {% if metrics_backend == "gnocchi" -%}
               list_join:
                 - ''
                 - - {'=': {server_group: {get_param: "OS::stack_id"}}}
+              {% endif -%}
+              {% if metrics_backend == "prometheus" -%}
+              str_replace:
+                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
+                params:
+                  server_name_prefix: {get_param: server_name_prefix}
+              {%- endif %}
 
         cpu_alarm_low:
+          {% if metrics_backend == "gnocchi" -%}
           type: OS::Aodh::GnocchiAggregationByResourcesAlarm
+          {% endif -%}
+          {% if metrics_backend == "prometheus" -%}
+          type: OS::Aodh::PrometheusAlarm
+          {% endif -%}
           properties:
             description: Scale down instance if CPU < 20%
+            {% if metrics_backend == "gnocchi" -%}
             metric: cpu
             aggregation_method: rate:mean
             granularity: 300
             evaluation_periods: 1
-            threshold: 12000000000.0
             resource_type: instance
+            threshold: 12000000000.0
+            {% endif -%}
+            threshold: 20
             comparison_operator: lt
             alarm_actions:
               - str_replace:
@@ -162,9 +251,17 @@
                   params:
                     url: {get_attr: [scaledown_policy, signal_url]}
             query:
+              {% if metrics_backend == "gnocchi" -%}
               list_join:
                 - ''
                 - - {'=': {server_group: {get_param: "OS::stack_id"}}}
+              {% endif -%}
+              {% if metrics_backend == "prometheus" -%}
+              str_replace:
+                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
+                params:
+                  server_name_prefix: {get_param: server_name_prefix}
+              {% endif %}
 
       outputs:
         scaleup_policy_signal_url:
diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index 14fc325..45ccb33 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -1,10 +1,33 @@
 ---
+- name: Copy the templates to openstackclient pod
+  ansible.builtin.shell: |
+    oc cp $HOME/templates/autoscaling/vnf/template.yaml openstackclient:/tmp
+    oc cp $HOME/templates/autoscaling/vnf/resources.yaml openstackclient:/tmp
+    oc cp $HOME/templates/autoscaling/vnf/instance.yaml openstackclient:/tmp
+
+  # template location was $HOME/templates/autoscaling/vnf/
+- name: "[temp] Set the location to copy the template to"
+  ansible.builtin.set_fact:
+    template_location: '/tmp'
+
+  # I need to pass parameters to the stack create
+  # TODO: Define parameters as vars, and add them to the role.
+  # TODO: Update pre-checks to make sure the image, flavor, key, network and external_network exist.
+  # Temperority use the install_yamls/devinstall make edpm_deploy_instance to make these resources.
+  # This maketarget uses the devsetup/scripts/edpm-deploy-instance.sh script
+  # TODO: Either update the test to create the required resources, or add checks to make sure they exist.
+- ansible.builtin.set_fact:
+    stack_image: cirros
+    stack_flavor: m1.small
+    #stack_keyname:
+    stack_network: private
+    stack_external_network: public
 - name: Create the stack
   ansible.builtin.shell: |
     #source ~/overcloudrc;
     {{ openstack_cmd }} stack create \
-    -t $HOME/templates/autoscaling/vnf/template.yaml \
-    -e $HOME/templates/autoscaling/vnf/resources.yaml \
+    -t {{ template_location }}/template.yaml \
+    -e {{ template_location }}/resources.yaml \
     {{ stack_name }};
   register: result
   failed_when: result.rc >= 1
@@ -73,7 +96,9 @@
   failed_when:
     - physical_resource_id_high.stdout != alarm_id_high.stdout
 
+  # TODO: get alt check for prom
 - name: Verify that metric resources exist for the stack
+  when: metrics_backend == "gnocchi"
   ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
index 021edf0..d9a144a 100644
--- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
+++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
@@ -1,29 +1,41 @@
 ---
+  # NOTE: The format of the output appears to have changed; There are now 2
+  # IP addresses, and the second one is the one associated with the floating IP
 - name: Register instance IP
   ansible.builtin.shell: |
     # source ~/overcloudrc;
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value)
     {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \
         grep -i $STACK_ID | \
-        awk  -F'=' '{print $2}' | \
-        awk '{print $1}'
-        #awk -F',' '{print $1}'
+        awk -F'=' '{print $2}' | \
+        awk -F'|' '{print $1}' | \
+        awk -F',' '{print $2}'
   register: vnf_instance_ip
 
 - name: Show the IP
   ansible.builtin.debug:
     var: vnf_instance_ip
 
+- when: vnf_instance_ip.stdout | length == 0
+  fail:
+    msg: "bad vnf_instance_ip"
+
 - name: Verfiy the number of instances before scaling
   ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l
   register: instance_count1
 
+  # NOTE: Disabling strict host key checking so that ssh doesn't give an error
+  # when the host key changes i.e. if a new VM has been assigned a
+  # previously-used IP address, which will happen during local testing but not
+  # in CI
+  # NOTE: the with_items is because I was capturing both IPs initially, and
+  # using both in case the order was not consistent.
 - name: Test automatic scaling up of instances
   ansible.builtin.shell: |
-    sshpass -p gocubsgo ssh cirros@{{ item }} "sudo yes > /dev/null &"
-  register: result
+    sshpass -p gocubsgo ssh -o StrictHostKeyChecking=False cirros@{{ item | trim }} "sudo yes > /dev/null &"
+  register: busy_process
   with_items: "{{ vnf_instance_ip.stdout_lines }}"
 
 - name: Verify that the alarm has been triggered
@@ -46,6 +58,12 @@
   register: instance_count2
   until: instance_count2.stdout == "3"
 
+- name: Stop the busy process
+  ansible.builtin.shell: |
+    sshpass -p gocubsgo ssh cirros@{{ item | trim }} "sudo killall yes"
+  register: kill_busy_process
+  with_items: "{{ vnf_instance_ip.stdout_lines }}"
+
 - name: Test automatic scaling down of instances
   ansible.builtin.pause:
     minutes: 5
@@ -61,6 +79,10 @@
   register: result
   until: result.stdout == "alarm"
 
+  # TODO: the metering.server group metadata was used for gnocchi alarm
+  # selection.
+  # prom uses the instance name, so the metadata MIGHT be removed, and a new
+  # check for whether the scaling group has scaled down may be needed.
 - name: Verify that the Orchestration service has scaled down the instances
   ansible.builtin.shell: |
     # source ~/overcloudrc;

From 2d5a006c53780fd973be8635f9f235771af1b907 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Tue, 7 May 2024 08:33:07 -0400
Subject: [PATCH 04/17] Fix syntax error

---
 playbooks/autoscaling_osp18.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index ca39423..9e5f7eb 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -13,7 +13,7 @@
   tasks:
     # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks
     - import_role:
-        name: '../name/telemetry_autoscaling'
+        name: '../roles/telemetry_autoscaling'
         tasks_from: 'verify_autoscaling'
       tags:
         - precheck 

From 13ae82647b516be0e58073c4658a7c7dfe51127b Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Tue, 7 May 2024 08:34:29 -0400
Subject: [PATCH 05/17] [test_autoscaling] Add a task to update the known_hosts
 file

To remove previous entries for the IP address, since the VMs will reuse the IP addresses, and this will cause the ssh commands to fail, due to looking like a main-in-the-middle attack.
---
 .../tasks/test_autoscaling.yml                | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
index d9a144a..3f15570 100644
--- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
+++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
@@ -26,12 +26,31 @@
     {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l
   register: instance_count1
 
+- shell: |
+    cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}"
+  with_items: "{{ vnf_instance_ip.stdout_lines }}"
+  ignore_errors: true
+
+- name: Remove the existing hostkey, if there is one for the target IP
+  ansible.builtin.lineinfile:
+    dest: '{{ ansible_env.HOME }}/.ssh/known_hosts'
+    state: absent
+    regexp: "{{ item |trim }}"
+  with_items: "{{ vnf_instance_ip.stdout_lines }}"
+
+- shell: |
+    cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}"
+  with_items: "{{ vnf_instance_ip.stdout_lines }}"
+  ignore_errors: true
+
   # NOTE: Disabling strict host key checking so that ssh doesn't give an error
   # when the host key changes i.e. if a new VM has been assigned a
   # previously-used IP address, which will happen during local testing but not
   # in CI
   # NOTE: the with_items is because I was capturing both IPs initially, and
   # using both in case the order was not consistent.
+  # Disabling hostkey checking didn't work. I need to remove the key from the known_hosts file before trying to tun this.
+  # The key removal should move to some pre/pre-run stage.
 - name: Test automatic scaling up of instances
   ansible.builtin.shell: |
     sshpass -p gocubsgo ssh -o StrictHostKeyChecking=False cirros@{{ item | trim }} "sudo yes > /dev/null &"

From 11b74f70755bb742d3e64a3acbcbb2289edd027b Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Tue, 7 May 2024 14:49:31 -0400
Subject: [PATCH 06/17] pre create resources using install_yamls

---
 playbooks/autoscaling_osp18.yaml           | 7 +++++++
 roles/telemetry_autoscaling/tasks/main.yml | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index 9e5f7eb..c5a34f8 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -11,6 +11,13 @@
     stack_network: "private"
     stack_external_network: "public"
   tasks:
+      # this is temperory and should be replaced later by some heat stack or some ansible
+    - community.general.make:
+        chdir: "{{ ansible_env.HOME }}/install_yamls/devsetup"
+        target: edpm_deploy_instance
+      tags:
+        - setup
+
     # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks
     - import_role:
         name: '../roles/telemetry_autoscaling'
diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml
index 1d137d0..ab4eee9 100644
--- a/roles/telemetry_autoscaling/tasks/main.yml
+++ b/roles/telemetry_autoscaling/tasks/main.yml
@@ -1,5 +1,7 @@
 ---
 # tasks file for telemetry_autoscaling
+  # TODO: Update the pre-checks to include the resources that the stack expects to exist
+  # i.e. networks, flavor, image, security group
 - name: Check pre-reqs for autoscaling
   ansible.builtin.include_tasks: verify_autoscaling.yml
 

From d576e6dca2d55f30dceba8a71e589362fdf36820 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Tue, 7 May 2024 15:07:33 -0400
Subject: [PATCH 07/17] Add task to clone install_yamls

---
 playbooks/autoscaling_osp18.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index c5a34f8..840a246 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -12,8 +12,13 @@
     stack_external_network: "public"
   tasks:
       # this is temperory and should be replaced later by some heat stack or some ansible
+    - ansible.builtin.command:
+        cmd: git clone http://github.com/openstack-k8s-operators/install_yamls
+        chdir: "{{ playbook_dir }}"
+      tags:
+        - setup
     - community.general.make:
-        chdir: "{{ ansible_env.HOME }}/install_yamls/devsetup"
+        chdir: "{{ playbook_dir }}/install_yamls/devsetup"
         target: edpm_deploy_instance
       tags:
         - setup

From ac0ec95bc4815418f52fe7b221a1f6370c40cac3 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Thu, 9 May 2024 12:38:27 -0400
Subject: [PATCH 08/17] patch the openstack cloud to use the expected versions
 of aodh and heat

---
 playbooks/autoscaling_osp18.yaml | 28 ++++++++++++++++++++++++++++
 playbooks/patch.yaml             |  9 +++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 playbooks/patch.yaml

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index 840a246..4760463 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -17,12 +17,40 @@
         chdir: "{{ playbook_dir }}"
       tags:
         - setup
+
     - community.general.make:
         chdir: "{{ playbook_dir }}/install_yamls/devsetup"
         target: edpm_deploy_instance
       tags:
         - setup
 
+    - name: Patch the openstackversions to use the master containers for aodh and heat
+      ansible.builtin.shell:
+        cmd: |
+          oc patch openstackversions openstack-galera-network-isolation --type merge --patch-file patch.yaml
+      tags:
+        - setup
+
+    - name: patch observabilityclient into openstackclient
+      shell:
+        cmd: |
+          oc exec openstackclient -- python3 -m ensurepip --upgrade
+          oc exec openstackclient -- python3 -m pip install --upgrade aodhclient
+          oc exec openstackclient -- python3 -m pip install python-observabilityclient
+      tags:
+        - setup
+
+    - name: Wait until the oscp is resolved the changes to continue
+      ansible.builtin.shell:
+        cmd: |
+          oc get oscp | grep "Setup complete"
+      retries: 24
+      timeout: 5
+      until: output.stdout_lines | length == 1
+      register: output
+      tags:
+        - setup
+
     # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks
     - import_role:
         name: '../roles/telemetry_autoscaling'
diff --git a/playbooks/patch.yaml b/playbooks/patch.yaml
new file mode 100644
index 0000000..a0b2e2b
--- /dev/null
+++ b/playbooks/patch.yaml
@@ -0,0 +1,9 @@
+spec:
+  customContainerImages:
+    aodhAPIImage: quay.io/podified-master-centos9/openstack-aodh-api:current-podified
+    aodhEvaluatorImage: quay.io/podified-master-centos9/openstack-aodh-evaluator:current-podified
+    aodhListenerImage: quay.io/podified-master-centos9/openstack-aodh-listener:current-podified
+    aodhNotifierImage: quay.io/podified-master-centos9/openstack-aodh-notifier:current-podified
+    heatAPIImage: quay.io/podified-master-centos9/openstack-heat-api:current-podified
+    heatCfnapiImage: quay.io/podified-master-centos9/openstack-heat-api-cfn:current-podified
+    heatEngineImage: quay.io/podified-master-centos9/openstack-heat-engine:current-podified 

From 921375b6e23c14e74f3a8047d0a167909cf4e0a4 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Fri, 10 May 2024 12:00:43 -0400
Subject: [PATCH 09/17] Update test for stack creation

---
 .../tasks/creating_stack.yml                      | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index 45ccb33..43c6330 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -32,16 +32,18 @@
   register: result
   failed_when: result.rc >= 1
 
-- name: Wait for 60 sec
-  ansible.builtin.pause:
-    minutes: 1
+    #- name: Wait for 60 sec
+    #  ansible.builtin.pause:
+    #    minutes: 1
 
 - name: Verify that the stack was created successfully
   ansible.builtin.shell: |
     # source ~/overcloudrc;
     {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status;
   register: result
-  failed_when: '"CREATE_COMPLETE" not in result.stdout'
+  until: '"CREATE_COMPLETE" in result.stdout'
+  timeout: 30
+  retries: 20
 
 - name: Verify that the stack resources are created
   ansible.builtin.shell: |
@@ -49,7 +51,10 @@
     export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
     {{ openstack_cmd }} stack resource list $STACK_ID;
   register: result
-  failed_when: '"CREATE_COMPLETE" not in result.stdout'
+  timeout: 30
+  retries: 20
+  until: '"CREATE_COMPLETE" in result.stdout'
+    #  failed_when: '"CREATE_COMPLETE" not in result.stdout'
 
 - name: Verify that an instance was launched by the stack creation
   ansible.builtin.shell: |

From 9d6729de975df7157d4d02455fb5603a4635d3f3 Mon Sep 17 00:00:00 2001
From: mgirgisf <mgirgisf@redhat.com>
Date: Mon, 13 May 2024 10:30:01 +0200
Subject: [PATCH 10/17] debug stack failure

---
 roles/telemetry_autoscaling/tasks/creating_stack.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index 43c6330..5a55328 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -35,7 +35,19 @@
     #- name: Wait for 60 sec
     #  ansible.builtin.pause:
     #    minutes: 1
+- name: Debug that the stack resources
+  block:    
+    - name: Verify that the alarms were created for the stack
+      ansible.builtin.shell: |
+        {{ openstack_cmd }} alarm list
+      register: result
+      failed_when: result.rc >= 1
 
+    - name: Verify that the stack resources are created
+      ansible.builtin.shell: |
+        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
+        {{ openstack_cmd }} stack resource list $STACK_ID;
+        
 - name: Verify that the stack was created successfully
   ansible.builtin.shell: |
     # source ~/overcloudrc;

From b7af4946d74c8845c6328a68ed759c3ee325583c Mon Sep 17 00:00:00 2001
From: mgirgisf <mgirgisf@redhat.com>
Date: Mon, 13 May 2024 12:54:58 +0200
Subject: [PATCH 11/17] debug the stack resource

---
 .../tasks/creating_stack.yml                    | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index 5a55328..b4a128b 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -36,8 +36,12 @@
     #  ansible.builtin.pause:
     #    minutes: 1
 - name: Debug that the stack resources
-  block:    
-    - name: Verify that the alarms were created for the stack
+  block:  
+    - name: Wait for 60 sec
+      ansible.builtin.pause:
+        minutes: 1  
+
+    - name: show that the alarms created for the stack
       ansible.builtin.shell: |
         {{ openstack_cmd }} alarm list
       register: result
@@ -47,6 +51,15 @@
       ansible.builtin.shell: |
         export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
         {{ openstack_cmd }} stack resource list $STACK_ID;
+    
+    - name: Verify that the stack was created successfully
+      ansible.builtin.shell: |
+        # source ~/overcloudrc;
+        {{ openstack_cmd }} stack show {{ stack_name }};
+      register: result
+      until: '"CREATE_COMPLETE" in result.stdout'
+      timeout: 30
+      retries: 20
         
 - name: Verify that the stack was created successfully
   ansible.builtin.shell: |

From e6950d7666c7e6a58c51aeecc4f289be8794f5e1 Mon Sep 17 00:00:00 2001
From: mgirgisf <mgirgisf@redhat.com>
Date: Tue, 14 May 2024 11:43:26 +0200
Subject: [PATCH 12/17] update query

---
 .../tasks/configure_heat.yml                  |  4 +--
 .../tasks/creating_stack.yml                  | 31 +++----------------
 2 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml
index 2e7099a..8ccfce0 100644
--- a/roles/telemetry_autoscaling/tasks/configure_heat.yml
+++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml
@@ -221,7 +221,7 @@
               {% endif -%}
               {% if metrics_backend == "prometheus" -%}
               str_replace:
-                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
+                template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100"
                 params:
                   server_name_prefix: {get_param: server_name_prefix}
               {%- endif %}
@@ -258,7 +258,7 @@
               {% endif -%}
               {% if metrics_backend == "prometheus" -%}
               str_replace:
-                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
+                template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100"
                 params:
                   server_name_prefix: {get_param: server_name_prefix}
               {% endif %}
diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index b4a128b..15bbd97 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -32,39 +32,18 @@
   register: result
   failed_when: result.rc >= 1
 
+- name: Print the result
+  ansible.builtin.debug:
+    var: result
+    
     #- name: Wait for 60 sec
     #  ansible.builtin.pause:
     #    minutes: 1
-- name: Debug that the stack resources
-  block:  
-    - name: Wait for 60 sec
-      ansible.builtin.pause:
-        minutes: 1  
-
-    - name: show that the alarms created for the stack
-      ansible.builtin.shell: |
-        {{ openstack_cmd }} alarm list
-      register: result
-      failed_when: result.rc >= 1
-
-    - name: Verify that the stack resources are created
-      ansible.builtin.shell: |
-        export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-        {{ openstack_cmd }} stack resource list $STACK_ID;
-    
-    - name: Verify that the stack was created successfully
-      ansible.builtin.shell: |
-        # source ~/overcloudrc;
-        {{ openstack_cmd }} stack show {{ stack_name }};
-      register: result
-      until: '"CREATE_COMPLETE" in result.stdout'
-      timeout: 30
-      retries: 20
         
 - name: Verify that the stack was created successfully
   ansible.builtin.shell: |
     # source ~/overcloudrc;
-    {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status;
+    {{ openstack_cmd }} stack show {{ stack_name }};
   register: result
   until: '"CREATE_COMPLETE" in result.stdout'
   timeout: 30

From e65feeac0d54696884414a7b202632fcaff8ad76 Mon Sep 17 00:00:00 2001
From: mgirgisf <mgirgisf@redhat.com>
Date: Tue, 14 May 2024 12:38:06 +0200
Subject: [PATCH 13/17] update query

---
 .../telemetry_autoscaling/tasks/creating_stack.yml  | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
index 15bbd97..fa853e3 100644
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml
@@ -35,7 +35,7 @@
 - name: Print the result
   ansible.builtin.debug:
     var: result
-    
+
     #- name: Wait for 60 sec
     #  ansible.builtin.pause:
     #    minutes: 1
@@ -59,6 +59,17 @@
   retries: 20
   until: '"CREATE_COMPLETE" in result.stdout'
     #  failed_when: '"CREATE_COMPLETE" not in result.stdout'
+    
+- name: Verify that ceilometer_cpu metric exist 
+  ansible.builtin.shell: |
+    # source ~/overcloudrc;
+    {{ openstack_cmd }} metric list
+  register: result
+  failed_when: result.rc >= 1
+
+- name: Print the result
+  ansible.builtin.debug:
+    var: result
 
 - name: Verify that an instance was launched by the stack creation
   ansible.builtin.shell: |

From 2f613ef477d7f6799c6f2ef70eeccf52205c574f Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Tue, 14 May 2024 12:01:48 -0400
Subject: [PATCH 14/17] Update python-observabilityclient version and alarm
 query

When updating the python-observabilityclient, the  flag was omitted, and package wasn't re-installed.
The alarm query needed to be reverted, since it was revised when debugging.
---
 playbooks/autoscaling_osp18.yaml                     | 2 +-
 roles/telemetry_autoscaling/tasks/configure_heat.yml | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index 4760463..221356c 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -36,7 +36,7 @@
         cmd: |
           oc exec openstackclient -- python3 -m ensurepip --upgrade
           oc exec openstackclient -- python3 -m pip install --upgrade aodhclient
-          oc exec openstackclient -- python3 -m pip install python-observabilityclient
+          oc exec openstackclient -- python3 -m pip install --upgrade python-observabilityclient
       tags:
         - setup
 
diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml
index 8ccfce0..2e7099a 100644
--- a/roles/telemetry_autoscaling/tasks/configure_heat.yml
+++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml
@@ -221,7 +221,7 @@
               {% endif -%}
               {% if metrics_backend == "prometheus" -%}
               str_replace:
-                template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100"
+                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
                 params:
                   server_name_prefix: {get_param: server_name_prefix}
               {%- endif %}
@@ -258,7 +258,7 @@
               {% endif -%}
               {% if metrics_backend == "prometheus" -%}
               str_replace:
-                template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100"
+                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
                 params:
                   server_name_prefix: {get_param: server_name_prefix}
               {% endif %}

From 27d15dda14c7df7e298ae10d5d883cdfe0331ab5 Mon Sep 17 00:00:00 2001
From: Emma Foley <efoley@redhat.com>
Date: Tue, 14 May 2024 15:32:03 -0400
Subject: [PATCH 15/17] Remove the telemetry_autoscaling role and use it from
 fvt instead

The telemetry_autoscaling role has been moved to
http://github.com/infrawatch/feature-verification-tests
---
 playbooks/autoscaling_osp18.yaml              |  24 +-
 roles/telemetry_autoscaling/README.md         |  38 ---
 roles/telemetry_autoscaling/defaults/main.yml |   5 -
 roles/telemetry_autoscaling/meta/main.yml     |  52 ----
 .../tasks/configure_heat.yml                  | 271 ------------------
 .../tasks/creating_stack.yml                  | 131 ---------
 roles/telemetry_autoscaling/tasks/main.yml    |  15 -
 .../tasks/test_autoscaling.yml                | 112 --------
 .../tasks/verify_autoscaling.yml              |  42 ---
 9 files changed, 20 insertions(+), 670 deletions(-)
 delete mode 100644 roles/telemetry_autoscaling/README.md
 delete mode 100644 roles/telemetry_autoscaling/defaults/main.yml
 delete mode 100644 roles/telemetry_autoscaling/meta/main.yml
 delete mode 100644 roles/telemetry_autoscaling/tasks/configure_heat.yml
 delete mode 100644 roles/telemetry_autoscaling/tasks/creating_stack.yml
 delete mode 100644 roles/telemetry_autoscaling/tasks/main.yml
 delete mode 100644 roles/telemetry_autoscaling/tasks/test_autoscaling.yml
 delete mode 100644 roles/telemetry_autoscaling/tasks/verify_autoscaling.yml

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index 221356c..cc7e7a3 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -12,9 +12,25 @@
     stack_external_network: "public"
   tasks:
       # this is temperory and should be replaced later by some heat stack or some ansible
+
+    - set_fact:
+        fvt_dir: "{{ playbook_dir }}/feature-verification-tests"
+      when: "{{ not fvt_dir is defined }}"
+      tags:
+        always
+
+    - debug: var=fvt_dir
+
+    - command:
+        cmd: git clone http://github.com/infrawatch/feature-verification-tests -b efoley-add_telemetry_autoscaling {{ fvt_dir }}
+        creates: "{{ fvt_dir }}"
+      tags:
+        - setup
+
     - ansible.builtin.command:
         cmd: git clone http://github.com/openstack-k8s-operators/install_yamls
         chdir: "{{ playbook_dir }}"
+        creates: "{{ playbook_dir }}/install_yamls"
       tags:
         - setup
 
@@ -53,25 +69,25 @@
 
     # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks
     - import_role:
-        name: '../roles/telemetry_autoscaling'
+        name: '{{ fvt_dir }}roles/telemetry_autoscaling'
         tasks_from: 'verify_autoscaling'
       tags:
         - precheck 
     - import_role:
-        name: '../roles/telemetry_autoscaling'
+        name: '{{ fvt_dir }}/roles/telemetry_autoscaling'
         tasks_from: configure_heat
           #tasks_from: creating_stack
       tags:
         - create
 
     - import_role:
-        name: '../roles/telemetry_autoscaling'
+        name: '{{ fvt_dir }}/roles/telemetry_autoscaling'
         tasks_from: creating_stack
       tags:
         - create
 
     - import_role:
-        name: '../roles/telemetry_autoscaling'
+        name: '{{ fvt_dir }}/roles/telemetry_autoscaling'
         tasks_from: test_autoscaling
       tags:
         - test
diff --git a/roles/telemetry_autoscaling/README.md b/roles/telemetry_autoscaling/README.md
deleted file mode 100644
index 225dd44..0000000
--- a/roles/telemetry_autoscaling/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-Role Name
-=========
-
-A brief description of the role goes here.
-
-Requirements
-------------
-
-Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required.
-
-Role Variables
---------------
-
-A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well.
-
-Dependencies
-------------
-
-A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles.
-
-Example Playbook
-----------------
-
-Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:
-
-    - hosts: servers
-      roles:
-         - { role: username.rolename, x: 42 }
-
-License
--------
-
-BSD
-
-Author Information
-------------------
-
-An optional section for the role authors to include contact information, or a website (HTML is not allowed).
diff --git a/roles/telemetry_autoscaling/defaults/main.yml b/roles/telemetry_autoscaling/defaults/main.yml
deleted file mode 100644
index 0bd5b91..0000000
--- a/roles/telemetry_autoscaling/defaults/main.yml
+++ /dev/null
@@ -1,5 +0,0 @@
----
-# defaults file for telemetry_autoscaling
-openstack_cmd: "openstack"
-stack_name: vnf
-metrics_backend: gnocchi
diff --git a/roles/telemetry_autoscaling/meta/main.yml b/roles/telemetry_autoscaling/meta/main.yml
deleted file mode 100644
index c572acc..0000000
--- a/roles/telemetry_autoscaling/meta/main.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-galaxy_info:
-  author: your name
-  description: your role description
-  company: your company (optional)
-
-  # If the issue tracker for your role is not on github, uncomment the
-  # next line and provide a value
-  # issue_tracker_url: http://example.com/issue/tracker
-
-  # Choose a valid license ID from https://spdx.org - some suggested licenses:
-  # - BSD-3-Clause (default)
-  # - MIT
-  # - GPL-2.0-or-later
-  # - GPL-3.0-only
-  # - Apache-2.0
-  # - CC-BY-4.0
-  license: license (GPL-2.0-or-later, MIT, etc)
-
-  min_ansible_version: 2.1
-
-  # If this a Container Enabled role, provide the minimum Ansible Container version.
-  # min_ansible_container_version:
-
-  #
-  # Provide a list of supported platforms, and for each platform a list of versions.
-  # If you don't wish to enumerate all versions for a particular platform, use 'all'.
-  # To view available platforms and versions (or releases), visit:
-  # https://galaxy.ansible.com/api/v1/platforms/
-  #
-  # platforms:
-  # - name: Fedora
-  #   versions:
-  #   - all
-  #   - 25
-  # - name: SomePlatform
-  #   versions:
-  #   - all
-  #   - 1.0
-  #   - 7
-  #   - 99.99
-
-  galaxy_tags: []
-    # List tags for your role here, one per line. A tag is a keyword that describes
-    # and categorizes the role. Users find roles by searching for tags. Be sure to
-    # remove the '[]' above, if you add tags to this list.
-    #
-    # NOTE: A tag is limited to a single word comprised of alphanumeric characters.
-    #       Maximum 20 tags per role.
-
-dependencies: []
-  # List your role dependencies here, one per line. Be sure to remove the '[]' above,
-  # if you add dependencies to this list.
diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml
deleted file mode 100644
index 2e7099a..0000000
--- a/roles/telemetry_autoscaling/tasks/configure_heat.yml
+++ /dev/null
@@ -1,271 +0,0 @@
----
-# RE: alarm definitions, it might be cleaner to add two of each alarm, and block off the entire resource, one for gnocchi and one for prom.
-# instead of having if/else blocks inside the resource.
-#
-# TODO(efoley) Consider a heat stack to create the pre-reqs for the autoscaling example.
-# i.e. create the sec group, network, etc
-# TODO(efoley): Move the HOT templates into templates/ dir for the role
-- name: Create the generic archive policy for autoscaling
-  when: metrics_backend == "gnocchi"
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} metric archive-policy create generic \
-    --back-window 0 \
-    --definition timespan:'4:00:00',granularity:'0:01:00',points:240 \
-    --aggregation-method 'rate:mean' \
-    --aggregation-method 'mean';
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Verify that the archive policy was created
-  when: metrics_backend == "gnocchi"
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} metric archive-policy show generic;
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Create "vnf" directory under templates
-  ansible.builtin.shell: |
-    mkdir -p $HOME/templates/autoscaling/vnf/
-
-  # TODO: Pass the parameters correctly. When I tried to pass the parameters
-  # into the template.yaml file, they weren't passed to the instance initially. 
-- name: Configure heat template for automatically scaling instances
-  ansible.builtin.copy:
-    dest: ~/templates/autoscaling/vnf/instance.yaml
-    content: |
-      heat_template_version: wallaby
-      description: Template to control scaling of VNF instance
-
-      parameters:
-        metadata:
-          type: json
-        image:
-          type: string
-          description: image used to create instance
-          default: {{ stack_image | default("workload_image_1") }}
-        flavor:
-          type: string
-          description: instance flavor to be used
-          default: {{ stack_flavor | default("workload_flavor_1") }}
-        key_name:
-          type: string
-          description: keypair to be used
-          default: workload_key_1
-        network:
-          type: string
-          description: project network to attach instance to
-          default: {{ stack_network | default("workload_internal_net_1") }}
-        external_network:
-          type: string
-          description: network used for floating IPs
-          default: {{ stack_external_network | default("public") }}
-        server_name_prefix:
-          type: string
-          description: a prefix for each server name.
-          default: ""
-        security_group:
-          type: string
-          description: the security group for the instances
-          default: basic
-
-      resources:
-        vnf:
-          type: OS::Nova::Server
-          properties:
-            {% if metrics_backend == "prometheus" -%}
-            name:
-              list_join: ["", [{get_param: server_name_prefix}, {get_param: OS::stack_name}]]
-            {% endif -%}
-            flavor: {get_param: flavor}
-            #key_name: {get_param: key_name}
-            image: { get_param: image }
-            metadata: { get_param: metadata }
-            networks:
-              - port: { get_resource: port }
-
-        port:
-          type: OS::Neutron::Port
-          properties:
-            network: {get_param: network}
-            security_groups:
-              - { get_param: security_group }
-
-        floating_ip:
-          type: OS::Neutron::FloatingIP
-          properties:
-            floating_network: {get_param: external_network }
-
-        floating_ip_assoc:
-          type: OS::Neutron::FloatingIPAssociation
-          properties:
-            floatingip_id: { get_resource: floating_ip }
-            port_id: { get_resource: port }
-
-- name: Create the resource to reference in the heat template
-  ansible.builtin.copy:
-    dest: ~/templates/autoscaling/vnf/resources.yaml
-    content: |
-      resource_registry:
-        "OS::Nova::Server::VNF": ./instance.yaml
-     # parameters:
-     #   image: cirros
-     #   flavor: m1.small
-     #   network: private
-     #   security_group: basic
-
-- name: Create the deployment template for heat to control instance scaling
-  ansible.builtin.copy:
-    dest: ~/templates/autoscaling/vnf/template.yaml
-    content: |
-      heat_template_version: wallaby
-      description:  Example auto scale group, policy and alarm
-      parameters:
-        server_name_prefix:
-          description: A prefix for servers created by this stack. Can be used in queries.
-          type: string
-          default: autoscaling_server_
-            #image:
-            #  type: string
-            #  description: image used to create instance
-            #  default: "{{ stack_network| default( 'workload_image_1') }}"
-            #flavor:
-            #  type: string
-            #  description: instance flavor to be used
-            #  default: "{{ stack_flavor | default('workload_flavor_1') }}"
-            #key_name:
-            #  type: string
-            #  description: keypair to be used
-            #  default: workload_key_1
-            #network:
-            #  type: string
-            #  description: project network to attach instance to
-            #  default: "{{ stack_network | default('workload_internal_net_1') }}"
-            #external_network:
-            #  type: string
-            #  description: network used for floating IPs
-            #  default: public
-
-      resources:
-        scaleup_group:
-          type: OS::Heat::AutoScalingGroup
-          properties:
-            max_size: 3
-            min_size: 1
-            desired_capacity: 1
-            resource:
-              type: OS::Nova::Server::VNF
-              # resource definieiton for the resource to be created by the group.
-              #  this passes in parameters to the resource.
-              #  So I can put a name_prefix here and it'll be set as the name...
-              #  But the parameter is needed in the VNF.
-              #  Can I set the name in an OS::Nova::Server?
-              # This sets the property/metadata for the VNF
-              #  name should be set here, but maybe not as metadata
-              # network: { get_param: network }
-              # flavor: { get_param: flavor }
-              # image: { get_param: image }
-              properties:
-                server_name_prefix: { get_param: server_name_prefix }
-                metadata: {"metering.server_group": {get_param: "OS::stack_id"}}
-
-
-        scaleup_policy:
-          type: OS::Heat::ScalingPolicy
-          properties:
-            adjustment_type: change_in_capacity
-            auto_scaling_group_id: { get_resource: scaleup_group }
-            cooldown: 60
-            scaling_adjustment: 1
-
-        scaledown_policy:
-          type: OS::Heat::ScalingPolicy
-          properties:
-            adjustment_type: change_in_capacity
-            auto_scaling_group_id: { get_resource: scaleup_group }
-            cooldown: 60
-            scaling_adjustment: -1
-
-        cpu_alarm_high:
-          {% if metrics_backend == "gnocchi" -%}
-          type: OS::Aodh::GnocchiAggregationByResourcesAlarm
-          {% endif -%}
-          {% if metrics_backend == "prometheus" -%}
-          type: OS::Aodh::PrometheusAlarm
-          {% endif -%}
-          properties:
-            description: Scale up instance if CPU > 50%
-            {% if metrics_backend == "gnocchi" -%}
-            metric: cpu
-            aggregation_method: rate:mean
-            granularity: 300
-            evaluation_periods: 1
-            resource_type: instance
-            threshold: 30000000000.0
-            {% endif -%}
-            {% if metrics_backend == "prometheus" -%}
-            threshold: 50
-            {% endif -%}
-            comparison_operator: gt
-            alarm_actions:
-              - str_replace:
-                  template: trust+url
-                  params:
-                    url: {get_attr: [scaleup_policy, signal_url]}
-            query:
-              {% if metrics_backend == "gnocchi" -%}
-              list_join:
-                - ''
-                - - {'=': {server_group: {get_param: "OS::stack_id"}}}
-              {% endif -%}
-              {% if metrics_backend == "prometheus" -%}
-              str_replace:
-                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
-                params:
-                  server_name_prefix: {get_param: server_name_prefix}
-              {%- endif %}
-
-        cpu_alarm_low:
-          {% if metrics_backend == "gnocchi" -%}
-          type: OS::Aodh::GnocchiAggregationByResourcesAlarm
-          {% endif -%}
-          {% if metrics_backend == "prometheus" -%}
-          type: OS::Aodh::PrometheusAlarm
-          {% endif -%}
-          properties:
-            description: Scale down instance if CPU < 20%
-            {% if metrics_backend == "gnocchi" -%}
-            metric: cpu
-            aggregation_method: rate:mean
-            granularity: 300
-            evaluation_periods: 1
-            resource_type: instance
-            threshold: 12000000000.0
-            {% endif -%}
-            threshold: 20
-            comparison_operator: lt
-            alarm_actions:
-              - str_replace:
-                  template: trust+url
-                  params:
-                    url: {get_attr: [scaledown_policy, signal_url]}
-            query:
-              {% if metrics_backend == "gnocchi" -%}
-              list_join:
-                - ''
-                - - {'=': {server_group: {get_param: "OS::stack_id"}}}
-              {% endif -%}
-              {% if metrics_backend == "prometheus" -%}
-              str_replace:
-                template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000"
-                params:
-                  server_name_prefix: {get_param: server_name_prefix}
-              {% endif %}
-
-      outputs:
-        scaleup_policy_signal_url:
-          value: {get_attr: [scaleup_policy, alarm_url]}
-
-        scaledown_policy_signal_url:
-          value: {get_attr: [scaledown_policy, alarm_url]}
diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml
deleted file mode 100644
index fa853e3..0000000
--- a/roles/telemetry_autoscaling/tasks/creating_stack.yml
+++ /dev/null
@@ -1,131 +0,0 @@
----
-- name: Copy the templates to openstackclient pod
-  ansible.builtin.shell: |
-    oc cp $HOME/templates/autoscaling/vnf/template.yaml openstackclient:/tmp
-    oc cp $HOME/templates/autoscaling/vnf/resources.yaml openstackclient:/tmp
-    oc cp $HOME/templates/autoscaling/vnf/instance.yaml openstackclient:/tmp
-
-  # template location was $HOME/templates/autoscaling/vnf/
-- name: "[temp] Set the location to copy the template to"
-  ansible.builtin.set_fact:
-    template_location: '/tmp'
-
-  # I need to pass parameters to the stack create
-  # TODO: Define parameters as vars, and add them to the role.
-  # TODO: Update pre-checks to make sure the image, flavor, key, network and external_network exist.
-  # Temperority use the install_yamls/devinstall make edpm_deploy_instance to make these resources.
-  # This maketarget uses the devsetup/scripts/edpm-deploy-instance.sh script
-  # TODO: Either update the test to create the required resources, or add checks to make sure they exist.
-- ansible.builtin.set_fact:
-    stack_image: cirros
-    stack_flavor: m1.small
-    #stack_keyname:
-    stack_network: private
-    stack_external_network: public
-- name: Create the stack
-  ansible.builtin.shell: |
-    #source ~/overcloudrc;
-    {{ openstack_cmd }} stack create \
-    -t {{ template_location }}/template.yaml \
-    -e {{ template_location }}/resources.yaml \
-    {{ stack_name }};
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Print the result
-  ansible.builtin.debug:
-    var: result
-
-    #- name: Wait for 60 sec
-    #  ansible.builtin.pause:
-    #    minutes: 1
-        
-- name: Verify that the stack was created successfully
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} stack show {{ stack_name }};
-  register: result
-  until: '"CREATE_COMPLETE" in result.stdout'
-  timeout: 30
-  retries: 20
-
-- name: Verify that the stack resources are created
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-    {{ openstack_cmd }} stack resource list $STACK_ID;
-  register: result
-  timeout: 30
-  retries: 20
-  until: '"CREATE_COMPLETE" in result.stdout'
-    #  failed_when: '"CREATE_COMPLETE" not in result.stdout'
-    
-- name: Verify that ceilometer_cpu metric exist 
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} metric list
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Print the result
-  ansible.builtin.debug:
-    var: result
-
-- name: Verify that an instance was launched by the stack creation
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-    {{ openstack_cmd }} server list --long | grep $STACK_ID;
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Verify that the alarms were created for the stack
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} alarm list
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Note the physical_resource_id values for the cpu_alarm_low resource
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-    {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}'
-  register: physical_resource_id_low
-
-- name: Note the physical_resource_id values for the cpu_alarm_high resource
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-    {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}'
-  register: physical_resource_id_high
-
-- name: Verify physical_resource_id match the alarm id for cpu_alarm_low
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}'
-  register: alarm_id_low
-  failed_when:
-    - physical_resource_id_low.stdout != alarm_id_low.stdout
-
-- name: Verify physical_resource_id match the alarm id for cpu_alarm_high
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}'
-  register: alarm_id_high
-  failed_when:
-    - physical_resource_id_high.stdout != alarm_id_high.stdout
-
-  # TODO: get alt check for prom
-- name: Verify that metric resources exist for the stack
-  when: metrics_backend == "gnocchi"
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value);
-    {{ openstack_cmd }} metric resource search \
-    --sort-column launched_at -c id \
-    -c display_name -c launched_at \
-    -c deleted_at --type instance \
-    server_group="$STACK_ID"
-  register: result
-  failed_when: result.rc >= 1
diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml
deleted file mode 100644
index ab4eee9..0000000
--- a/roles/telemetry_autoscaling/tasks/main.yml
+++ /dev/null
@@ -1,15 +0,0 @@
----
-# tasks file for telemetry_autoscaling
-  # TODO: Update the pre-checks to include the resources that the stack expects to exist
-  # i.e. networks, flavor, image, security group
-- name: Check pre-reqs for autoscaling
-  ansible.builtin.include_tasks: verify_autoscaling.yml
-
-- name: Create the heat templates
-  ansible.builtin.include_tasks: configure_heat.yml
-
-- name: Launch the stack
-  ansible.builtin.include_tasks: creating_stack.yml
-
-- name: Run the autoscaling tests
-  ansible.builtin.include_tasks: test_autoscaling.yml
diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
deleted file mode 100644
index 3f15570..0000000
--- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml
+++ /dev/null
@@ -1,112 +0,0 @@
----
-  # NOTE: The format of the output appears to have changed; There are now 2
-  # IP addresses, and the second one is the one associated with the floating IP
-- name: Register instance IP
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value)
-    {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \
-        grep -i $STACK_ID | \
-        awk -F'=' '{print $2}' | \
-        awk -F'|' '{print $1}' | \
-        awk -F',' '{print $2}'
-  register: vnf_instance_ip
-
-- name: Show the IP
-  ansible.builtin.debug:
-    var: vnf_instance_ip
-
-- when: vnf_instance_ip.stdout | length == 0
-  fail:
-    msg: "bad vnf_instance_ip"
-
-- name: Verfiy the number of instances before scaling
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l
-  register: instance_count1
-
-- shell: |
-    cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}"
-  with_items: "{{ vnf_instance_ip.stdout_lines }}"
-  ignore_errors: true
-
-- name: Remove the existing hostkey, if there is one for the target IP
-  ansible.builtin.lineinfile:
-    dest: '{{ ansible_env.HOME }}/.ssh/known_hosts'
-    state: absent
-    regexp: "{{ item |trim }}"
-  with_items: "{{ vnf_instance_ip.stdout_lines }}"
-
-- shell: |
-    cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}"
-  with_items: "{{ vnf_instance_ip.stdout_lines }}"
-  ignore_errors: true
-
-  # NOTE: Disabling strict host key checking so that ssh doesn't give an error
-  # when the host key changes i.e. if a new VM has been assigned a
-  # previously-used IP address, which will happen during local testing but not
-  # in CI
-  # NOTE: the with_items is because I was capturing both IPs initially, and
-  # using both in case the order was not consistent.
-  # Disabling hostkey checking didn't work. I need to remove the key from the known_hosts file before trying to tun this.
-  # The key removal should move to some pre/pre-run stage.
-- name: Test automatic scaling up of instances
-  ansible.builtin.shell: |
-    sshpass -p gocubsgo ssh -o StrictHostKeyChecking=False cirros@{{ item | trim }} "sudo yes > /dev/null &"
-  register: busy_process
-  with_items: "{{ vnf_instance_ip.stdout_lines }}"
-
-- name: Verify that the alarm has been triggered
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} alarm list -c state -c name -f value| \
-        grep -i "cpu_alarm_high" | \
-        awk '{print $2}'
-  retries: 100
-  delay: 5
-  register: result
-  until: result.stdout == "alarm"
-
-- name: Verify that the Orchestration service has scaled up the instances
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l
-  retries: 100
-  delay: 5
-  register: instance_count2
-  until: instance_count2.stdout == "3"
-
-- name: Stop the busy process
-  ansible.builtin.shell: |
-    sshpass -p gocubsgo ssh cirros@{{ item | trim }} "sudo killall yes"
-  register: kill_busy_process
-  with_items: "{{ vnf_instance_ip.stdout_lines }}"
-
-- name: Test automatic scaling down of instances
-  ansible.builtin.pause:
-    minutes: 5
-
-- name: Verify that the alarm has been triggered
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} alarm list -c state -c name -f value| \
-        grep -i "cpu_alarm_low" | \
-        awk '{print $2}'
-  retries: 100
-  delay: 5
-  register: result
-  until: result.stdout == "alarm"
-
-  # TODO: the metering.server group metadata was used for gnocchi alarm
-  # selection.
-  # prom uses the instance name, so the metadata MIGHT be removed, and a new
-  # check for whether the scaling group has scaled down may be needed.
-- name: Verify that the Orchestration service has scaled down the instances
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l
-  retries: 100
-  delay: 5
-  register: instance_count3
-  until: instance_count3.stdout == "1"
diff --git a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
deleted file mode 100644
index 9ae42a3..0000000
--- a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml
+++ /dev/null
@@ -1,42 +0,0 @@
----
-# This doesn't work when using prom as the metrics backend
-# TODO: Add a metrics_backend option
-- name: Test service API endpoint(metric) for autoscaling
-  when: metrics_backend == "gnocchi"
-  ansible.builtin.shell: |
-    #source ~/stackrc;
-    {{ openstack_cmd }} endpoint list --service metric;
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Test service API endpoint(alarm) for autoscaling
-  ansible.builtin.shell: |
-    #source ~/stackrc;
-    {{ openstack_cmd }} endpoint list --service alarming;
-  register: result
-  failed_when: result.rc >= 1
-
-- name: Test service API endpoint(heat) for autoscaling
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} endpoint list --service orchestration;
-  register: result
-  failed_when: result.rc >= 1
-
-  # need selection criteria to decide when to run these.
-  # Need alternative for OSP18.
-- name: Verify that the services are running on the overcloud
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh';
-  register: result
-  failed_when: result.rc >= 1
-  ignore_errors: true
-
-- name: Verify that the time-series database service is available
-  when: metrics_backend == "gnocchi"
-  ansible.builtin.shell: |
-    # source ~/overcloudrc;
-    {{ openstack_cmd }} metric status --fit-width;
-  register: result
-  failed_when: result.rc >= 1

From 49f34468388bef7f8b7733bbf663251da53f10f9 Mon Sep 17 00:00:00 2001
From: mgirgisf <mgirgisf@redhat.com>
Date: Wed, 15 May 2024 09:23:12 +0200
Subject: [PATCH 16/17] update clone task for fvt

---
 playbooks/autoscaling_osp18.yaml | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml
index cc7e7a3..3abbb5f 100644
--- a/playbooks/autoscaling_osp18.yaml
+++ b/playbooks/autoscaling_osp18.yaml
@@ -10,22 +10,15 @@
     stack_flavor: "m1.small"
     stack_network: "private"
     stack_external_network: "public"
+     # this is temperory and should be replaced later by some heat stack or some ansible
+    fvt_dir: "{{ playbook_dir }}/feature-verification-tests"
   tasks:
-      # this is temperory and should be replaced later by some heat stack or some ansible
-
-    - set_fact:
-        fvt_dir: "{{ playbook_dir }}/feature-verification-tests"
-      when: "{{ not fvt_dir is defined }}"
-      tags:
-        always
-
-    - debug: var=fvt_dir
-
-    - command:
-        cmd: git clone http://github.com/infrawatch/feature-verification-tests -b efoley-add_telemetry_autoscaling {{ fvt_dir }}
-        creates: "{{ fvt_dir }}"
-      tags:
-        - setup
+     # this is temperory
+    - name: Clone feature-verification repository
+      git:
+        repo:  http://github.com/infrawatch/feature-verification-tests
+        dest: "{{ fvt_dir }}"
+        version: efoley-add_telemetry_autoscaling    
 
     - ansible.builtin.command:
         cmd: git clone http://github.com/openstack-k8s-operators/install_yamls
@@ -69,7 +62,7 @@
 
     # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks
     - import_role:
-        name: '{{ fvt_dir }}roles/telemetry_autoscaling'
+        name: '{{ fvt_dir }}/roles/telemetry_autoscaling'
         tasks_from: 'verify_autoscaling'
       tags:
         - precheck 

From 7b4bc4244e92c260f7a20c65023b822a3a4da2ba Mon Sep 17 00:00:00 2001
From: Muneesha Yadla <myadla@redhat.com>
Date: Wed, 15 May 2024 06:52:38 -0400
Subject: [PATCH 17/17] Added pre-config file to test autoscaling on OSP18

---
 playbooks/preconfig_osp18.yaml | 69 ++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 playbooks/preconfig_osp18.yaml

diff --git a/playbooks/preconfig_osp18.yaml b/playbooks/preconfig_osp18.yaml
new file mode 100644
index 0000000..0220079
--- /dev/null
+++ b/playbooks/preconfig_osp18.yaml
@@ -0,0 +1,69 @@
+#!/usr/bin/env ansible-playbook
+---
+# Pre-config steps to setup autoscaling for OSP18
+
+- hosts: localhost
+  become: no
+  name: Pre-config steps for Autoscaling on OSP18
+  
+  tasks:
+    - name: Install openstackclient dependencies
+      shell:
+        cmd: |
+          oc exec -ti openstackclient -- python3 -m ensurepip --upgrade
+          oc exec -ti openstackclient -- python3 -m pip install --upgrade aodhclient
+          oc exec -ti openstackclient -- python3 -m pip install python-observabilityclient
+
+    - name: Install Cluster Observability Operator(COO)
+      ansible.builtin.shell:
+        cmd: |
+          oc create -f - <<EOF
+          apiVersion: operators.coreos.com/v1alpha1
+          kind: Subscription
+          metadata:
+            name: cluster-observability-operator
+            namespace: openshift-operators
+          spec:
+            channel: development
+            installPlanApproval: Automatic
+            name: cluster-observability-operator
+            source: redhat-operators
+            sourceNamespace: openshift-marketplace
+          EOF
+      changed_when: false
+      register: result
+      failed_when: result.rc >= 1
+
+    - name: Wait for the installation to succeed
+      pause:
+        minutes: 1
+
+    - name: Verify COO is installed successfully
+      ansible.builtin.command:
+        cmd: |
+          oc wait --for jsonpath="{.status.phase}"=Succeeded csv --namespace=openshift-operators -l operators.coreos.com/cluster-observability-operator.openshift-operators
+      register: result
+      changed_when: false
+      failed_when: '"condition met" not in result.stdout'
+
+    - name: Add content to a spec file in home dir
+      copy:
+        dest: $HOME/spec.yaml
+        content: |
+          spec:
+            customContainerImages:
+              aodhAPIImage: quay.io/podified-master-centos9/openstack-aodh-api:current-podified
+              aodhEvaluatorImage: quay.io/podified-master-centos9/openstack-aodh-evaluator:current-podified
+              aodhListenerImage: quay.io/podified-master-centos9/openstack-aodh-listener:current-podified
+              aodhNotifierImage: quay.io/podified-master-centos9/openstack-aodh-notifier:current-podified
+              heatAPIImage: quay.io/podified-master-centos9/openstack-heat-api:current-podified
+              heatCfnapiImage: quay.io/podified-master-centos9/openstack-heat-api-cfn:current-podified
+              heatEngineImage: quay.io/podified-master-centos9/openstack-heat-engine:current-podified
+
+    - name: Update the openstackversions CR to run master images for aodh and heat
+      ansible.builtin.shell:
+        cmd: |
+          oc patch openstackversions openstack-galera-network-isolation --type merge --patch-file $HOME/spec.yaml
+      register: result
+
+...