From c0df844cc3d1a0a3787644b4675fd18aab8b40f1 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Wed, 1 May 2024 14:43:32 -0400 Subject: [PATCH 01/17] Move playbook tasks to role In preparation for other refactors. This will allow for setting some default values for vars, so that the existing playbooks still run as they did previously without need to update the scripts running them. having the role will also allow to use the file/ and templates/ sub-directories, and add some additional configurability to the role. --- playbooks/autoscaling_osp18.yaml | 12 ++ playbooks/configure_heat.yaml | 178 +----------------- playbooks/creating_stack.yaml | 92 +-------- playbooks/test_autoscaling.yaml | 72 +------ playbooks/verify_autoscaling.yaml | 41 +--- roles/telemetry_autoscaling/README.md | 38 ++++ roles/telemetry_autoscaling/defaults/main.yml | 5 + roles/telemetry_autoscaling/meta/main.yml | 52 +++++ .../tasks/configure_heat.yml | 178 ++++++++++++++++++ .../tasks/creating_stack.yml | 86 +++++++++ roles/telemetry_autoscaling/tasks/main.yml | 7 + .../tasks/test_autoscaling.yml | 69 +++++++ .../tasks/verify_autoscaling.yml | 42 +++++ 13 files changed, 501 insertions(+), 371 deletions(-) create mode 100644 playbooks/autoscaling_osp18.yaml create mode 100644 roles/telemetry_autoscaling/README.md create mode 100644 roles/telemetry_autoscaling/defaults/main.yml create mode 100644 roles/telemetry_autoscaling/meta/main.yml create mode 100644 roles/telemetry_autoscaling/tasks/configure_heat.yml create mode 100644 roles/telemetry_autoscaling/tasks/creating_stack.yml create mode 100644 roles/telemetry_autoscaling/tasks/main.yml create mode 100644 roles/telemetry_autoscaling/tasks/test_autoscaling.yml create mode 100644 roles/telemetry_autoscaling/tasks/verify_autoscaling.yml diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml new file mode 100644 index 0000000..75b7fd4 --- /dev/null +++ b/playbooks/autoscaling_osp18.yaml @@ -0,0 +1,12 @@ +--- +- hosts: localhost + become: no + name: Run the autoscaling tests + vars: + openstack_cmd: "oc rsh openstackclient openstack" + metrics_backend: "prometheus" + + tasks: + - import_role: + name: '../roles/telemetry_autoscaling' + diff --git a/playbooks/configure_heat.yaml b/playbooks/configure_heat.yaml index 8df3966..4c23e83 100644 --- a/playbooks/configure_heat.yaml +++ b/playbooks/configure_heat.yaml @@ -6,181 +6,9 @@ - hosts: undercloud become: no name: Using the heat service for autoscaling - vars: - openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}" - stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}" tasks: - - - name: Create the generic archive policy for autoscaling - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} metric archive-policy create generic \ - --back-window 0 \ - --definition timespan:'4:00:00',granularity:'0:01:00',points:240 \ - --aggregation-method 'rate:mean' \ - --aggregation-method 'mean'; - register: result - failed_when: result.rc >= 1 - - - name: Verify that the archive policy was created - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} metric archive-policy show generic; - register: result - failed_when: result.rc >= 1 - - - name: Create "vnf" directory under templates - shell: | - mkdir -p $HOME/templates/autoscaling/vnf/ - - - name: Configure heat template for automatically scaling instances - copy: - dest: ~/templates/autoscaling/vnf/instance.yaml - content: | - heat_template_version: wallaby - description: Template to control scaling of VNF instance - - parameters: - metadata: - type: json - image: - type: string - description: image used to create instance - default: workload_image_1 - flavor: - type: string - description: instance flavor to be used - default: workload_flavor_1 - key_name: - type: string - description: keypair to be used - default: workload_key_1 - network: - type: string - description: project network to attach instance to - default: workload_internal_net_1 - external_network: - type: string - description: network used for floating IPs - default: public - - resources: - vnf: - type: OS::Nova::Server - properties: - flavor: {get_param: flavor} - key_name: {get_param: key_name} - image: { get_param: image } - metadata: { get_param: metadata } - networks: - - port: { get_resource: port } - - port: - type: OS::Neutron::Port - properties: - network: {get_param: network} - security_groups: - - workload_secgroup_1 - - floating_ip: - type: OS::Neutron::FloatingIP - properties: - floating_network: {get_param: external_network } - - floating_ip_assoc: - type: OS::Neutron::FloatingIPAssociation - properties: - floatingip_id: { get_resource: floating_ip } - port_id: { get_resource: port } - - - name: Create the resource to reference in the heat template - copy: - dest: ~/templates/autoscaling/vnf/resources.yaml - content: | - resource_registry: - "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml - - - name: Create the deployment template for heat to control instance scaling - copy: - dest: ~/templates/autoscaling/vnf/template.yaml - content: | - heat_template_version: wallaby - description: Example auto scale group, policy and alarm - resources: - scaleup_group: - type: OS::Heat::AutoScalingGroup - properties: - max_size: 3 - min_size: 1 - #desired_capacity: 1 - resource: - type: OS::Nova::Server::VNF - properties: - metadata: {"metering.server_group": {get_param: "OS::stack_id"}} - - scaleup_policy: - type: OS::Heat::ScalingPolicy - properties: - adjustment_type: change_in_capacity - auto_scaling_group_id: { get_resource: scaleup_group } - cooldown: 60 - scaling_adjustment: 1 - - scaledown_policy: - type: OS::Heat::ScalingPolicy - properties: - adjustment_type: change_in_capacity - auto_scaling_group_id: { get_resource: scaleup_group } - cooldown: 60 - scaling_adjustment: -1 - - cpu_alarm_high: - type: OS::Aodh::GnocchiAggregationByResourcesAlarm - properties: - description: Scale up instance if CPU > 50% - metric: cpu - aggregation_method: rate:mean - granularity: 300 - evaluation_periods: 1 - threshold: 30000000000.0 - resource_type: instance - comparison_operator: gt - alarm_actions: - - str_replace: - template: trust+url - params: - url: {get_attr: [scaleup_policy, signal_url]} - query: - list_join: - - '' - - - {'=': {server_group: {get_param: "OS::stack_id"}}} - - cpu_alarm_low: - type: OS::Aodh::GnocchiAggregationByResourcesAlarm - properties: - description: Scale down instance if CPU < 20% - metric: cpu - aggregation_method: rate:mean - granularity: 300 - evaluation_periods: 1 - threshold: 12000000000.0 - resource_type: instance - comparison_operator: lt - alarm_actions: - - str_replace: - template: trust+url - params: - url: {get_attr: [scaledown_policy, signal_url]} - query: - list_join: - - '' - - - {'=': {server_group: {get_param: "OS::stack_id"}}} - - outputs: - scaleup_policy_signal_url: - value: {get_attr: [scaleup_policy, alarm_url]} - - scaledown_policy_signal_url: - value: {get_attr: [scaledown_policy, alarm_url]} + - import_role: + name: '../roles/telemetry_autoscaling' + tasks_from: configure_heat ... diff --git a/playbooks/creating_stack.yaml b/playbooks/creating_stack.yaml index 6a2011e..860c9a1 100644 --- a/playbooks/creating_stack.yaml +++ b/playbooks/creating_stack.yaml @@ -6,95 +6,9 @@ - hosts: undercloud become: no name: Creating the stack deployment for autoscaling - vars: - openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}" - stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}" tasks: - - - name: Create the stack - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} stack create \ - -t $HOME/templates/autoscaling/vnf/template.yaml \ - -e $HOME/templates/autoscaling/vnf/resources.yaml \ - {{ stack_name }}; - register: result - failed_when: result.rc >= 1 - - - name: Wait for 60 sec - pause: - minutes: 1 - - - name: Verify that the stack was created successfully - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status; - register: result - failed_when: '"CREATE_COMPLETE" not in result.stdout' - - - name: Verify that the stack resources are created - shell: | - source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID; - register: result - failed_when: '"CREATE_COMPLETE" not in result.stdout' - - - name: Verify that an instance was launched by the stack creation - shell: | - source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} server list --long | grep $STACK_ID; - register: result - failed_when: result.rc >= 1 - - - name: Verify that the alarms were created for the stack - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} alarm list - register: result - failed_when: result.rc >= 1 - - - name: Note the physical_resource_id values for the cpu_alarm_low resource - shell: | - source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}' - register: physical_resource_id_low - - - name: Note the physical_resource_id values for the cpu_alarm_high resource - shell: | - source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}' - register: physical_resource_id_high - - - name: Verify physical_resource_id match the alarm id for cpu_alarm_low - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}' - register: alarm_id_low - failed_when: - - physical_resource_id_low.stdout != alarm_id_low.stdout - - - name: Verify physical_resource_id match the alarm id for cpu_alarm_high - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}' - register: alarm_id_high - failed_when: - - physical_resource_id_high.stdout != alarm_id_high.stdout - - - name: Verify that metric resources exist for the stack - shell: | - source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} metric resource search \ - --sort-column launched_at -c id \ - -c display_name -c launched_at \ - -c deleted_at --type instance \ - server_group="$STACK_ID" - register: result - failed_when: result.rc >= 1 + - include_role: + name: '../roles/telemetry_autoscaling' + tasks_from: creating_stack ... diff --git a/playbooks/test_autoscaling.yaml b/playbooks/test_autoscaling.yaml index 9a768fa..06ead50 100644 --- a/playbooks/test_autoscaling.yaml +++ b/playbooks/test_autoscaling.yaml @@ -6,74 +6,8 @@ - hosts: undercloud become: no name: Test automatic scaling of instances - vars: - openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}" - stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}" tasks: - - - name: register instance IP - shell: | - source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value) - {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \ - grep -i $STACK_ID | \ - awk -F'=' '{print $2}' | \ - awk -F',' '{print $1}' - register: vnf_instance_ip - - - name: Verfiy the number of instances before scaling - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l - register: instance_count1 - - - name: Test automatic scaling up of instances - shell: | - sshpass -p gocubsgo ssh cirros@{{ vnf_instance_ip.stdout }} "sudo yes > /dev/null &" - register: result - - - name: Verify that the alarm has been triggered - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} alarm list -c state -c name -f value| \ - grep -i "cpu_alarm_high" | \ - awk '{print $2}' - retries: 100 - delay: 5 - register: result - until: result.stdout == "alarm" - - - name: Verify that the Orchestration service has scaled up the instances - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l - retries: 100 - delay: 5 - register: instance_count2 - until: instance_count2.stdout == "3" - - - name: Test automatic scaling down of instances - pause: - minutes: 5 - - - name: Verify that the alarm has been triggered - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} alarm list -c state -c name -f value| \ - grep -i "cpu_alarm_low" | \ - awk '{print $2}' - retries: 100 - delay: 5 - register: result - until: result.stdout == "alarm" - - - name: Verify that the Orchestration service has scaled down the instances - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l - retries: 100 - delay: 5 - register: instance_count3 - until: instance_count3.stdout == "1" - + - import_role: + name: '../roles/telemetry_autoscaling' + tasks_from: test_autoscaling ... diff --git a/playbooks/verify_autoscaling.yaml b/playbooks/verify_autoscaling.yaml index fe2de50..e88a0d3 100644 --- a/playbooks/verify_autoscaling.yaml +++ b/playbooks/verify_autoscaling.yaml @@ -6,43 +6,8 @@ - hosts: undercloud become: no name: Verifying the overcloud deployment for autoscaling - vars: - openstack_cmd: "{{ openstack_cmd if openstack_cmd is defined else 'openstack' }}" - stack_name: "{{ stack_name if stack_name is defined else 'vnf' }}" tasks: - - - name: test service API endpoint(metric) for autoscaling - shell: | - source ~/stackrc; - {{ openstack_cmd }} endpoint list --service metric; - register: result - failed_when: result.rc >= 1 - - - name: test service API endpoint(alarm) for autoscaling - shell: | - source ~/stackrc; - {{ openstack_cmd }} endpoint list --service alarming; - register: result - failed_when: result.rc >= 1 - - - name: test service API endpoint(heat) for autoscaling - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} endpoint list --service orchestration; - register: result - failed_when: result.rc >= 1 - - - name: Verify that the services are running on the overcloud - shell: | - source ~/overcloudrc; - sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh'; - register: result - failed_when: result.rc >= 1 - - - name: Verify that the time-series database service is available - shell: | - source ~/overcloudrc; - {{ openstack_cmd }} metric status --fit-width; - register: result - failed_when: result.rc >= 1 + - import_role: + name: '../roles/telemetry_autoscaling' + tasks_from: verify_autoscaling ... diff --git a/roles/telemetry_autoscaling/README.md b/roles/telemetry_autoscaling/README.md new file mode 100644 index 0000000..225dd44 --- /dev/null +++ b/roles/telemetry_autoscaling/README.md @@ -0,0 +1,38 @@ +Role Name +========= + +A brief description of the role goes here. + +Requirements +------------ + +Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required. + +Role Variables +-------------- + +A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. + +Dependencies +------------ + +A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. + +Example Playbook +---------------- + +Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: + + - hosts: servers + roles: + - { role: username.rolename, x: 42 } + +License +------- + +BSD + +Author Information +------------------ + +An optional section for the role authors to include contact information, or a website (HTML is not allowed). diff --git a/roles/telemetry_autoscaling/defaults/main.yml b/roles/telemetry_autoscaling/defaults/main.yml new file mode 100644 index 0000000..0bd5b91 --- /dev/null +++ b/roles/telemetry_autoscaling/defaults/main.yml @@ -0,0 +1,5 @@ +--- +# defaults file for telemetry_autoscaling +openstack_cmd: "openstack" +stack_name: vnf +metrics_backend: gnocchi diff --git a/roles/telemetry_autoscaling/meta/main.yml b/roles/telemetry_autoscaling/meta/main.yml new file mode 100644 index 0000000..c572acc --- /dev/null +++ b/roles/telemetry_autoscaling/meta/main.yml @@ -0,0 +1,52 @@ +galaxy_info: + author: your name + description: your role description + company: your company (optional) + + # If the issue tracker for your role is not on github, uncomment the + # next line and provide a value + # issue_tracker_url: http://example.com/issue/tracker + + # Choose a valid license ID from https://spdx.org - some suggested licenses: + # - BSD-3-Clause (default) + # - MIT + # - GPL-2.0-or-later + # - GPL-3.0-only + # - Apache-2.0 + # - CC-BY-4.0 + license: license (GPL-2.0-or-later, MIT, etc) + + min_ansible_version: 2.1 + + # If this a Container Enabled role, provide the minimum Ansible Container version. + # min_ansible_container_version: + + # + # Provide a list of supported platforms, and for each platform a list of versions. + # If you don't wish to enumerate all versions for a particular platform, use 'all'. + # To view available platforms and versions (or releases), visit: + # https://galaxy.ansible.com/api/v1/platforms/ + # + # platforms: + # - name: Fedora + # versions: + # - all + # - 25 + # - name: SomePlatform + # versions: + # - all + # - 1.0 + # - 7 + # - 99.99 + + galaxy_tags: [] + # List tags for your role here, one per line. A tag is a keyword that describes + # and categorizes the role. Users find roles by searching for tags. Be sure to + # remove the '[]' above, if you add tags to this list. + # + # NOTE: A tag is limited to a single word comprised of alphanumeric characters. + # Maximum 20 tags per role. + +dependencies: [] + # List your role dependencies here, one per line. Be sure to remove the '[]' above, + # if you add dependencies to this list. diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml new file mode 100644 index 0000000..58db9e5 --- /dev/null +++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml @@ -0,0 +1,178 @@ +--- +- set_fact: + metrics_backend: "gnocchi" + when: not (metrics_backend is defined) + +- name: Create the generic archive policy for autoscaling + when: metrics_backend == "gnocchi" + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} metric archive-policy create generic \ + --back-window 0 \ + --definition timespan:'4:00:00',granularity:'0:01:00',points:240 \ + --aggregation-method 'rate:mean' \ + --aggregation-method 'mean'; + register: result + failed_when: result.rc >= 1 + +- name: Verify that the archive policy was created + when: metrics_backend == "gnocchi" + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} metric archive-policy show generic; + register: result + failed_when: result.rc >= 1 + +- name: Create "vnf" directory under templates + shell: | + mkdir -p $HOME/templates/autoscaling/vnf/ + +- name: Configure heat template for automatically scaling instances + copy: + dest: ~/templates/autoscaling/vnf/instance.yaml + content: | + heat_template_version: wallaby + description: Template to control scaling of VNF instance + + parameters: + metadata: + type: json + image: + type: string + description: image used to create instance + default: workload_image_1 + flavor: + type: string + description: instance flavor to be used + default: workload_flavor_1 + key_name: + type: string + description: keypair to be used + default: workload_key_1 + network: + type: string + description: project network to attach instance to + default: workload_internal_net_1 + external_network: + type: string + description: network used for floating IPs + default: public + + resources: + vnf: + type: OS::Nova::Server + properties: + flavor: {get_param: flavor} + key_name: {get_param: key_name} + image: { get_param: image } + metadata: { get_param: metadata } + networks: + - port: { get_resource: port } + + port: + type: OS::Neutron::Port + properties: + network: {get_param: network} + security_groups: + - workload_secgroup_1 + + floating_ip: + type: OS::Neutron::FloatingIP + properties: + floating_network: {get_param: external_network } + + floating_ip_assoc: + type: OS::Neutron::FloatingIPAssociation + properties: + floatingip_id: { get_resource: floating_ip } + port_id: { get_resource: port } + +- name: Create the resource to reference in the heat template + copy: + dest: ~/templates/autoscaling/vnf/resources.yaml + content: | + resource_registry: + "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml + +- name: Create the deployment template for heat to control instance scaling + copy: + dest: ~/templates/autoscaling/vnf/template.yaml + content: | + heat_template_version: wallaby + description: Example auto scale group, policy and alarm + resources: + scaleup_group: + type: OS::Heat::AutoScalingGroup + properties: + max_size: 3 + min_size: 1 + #desired_capacity: 1 + resource: + type: OS::Nova::Server::VNF + properties: + metadata: {"metering.server_group": {get_param: "OS::stack_id"}} + + scaleup_policy: + type: OS::Heat::ScalingPolicy + properties: + adjustment_type: change_in_capacity + auto_scaling_group_id: { get_resource: scaleup_group } + cooldown: 60 + scaling_adjustment: 1 + + scaledown_policy: + type: OS::Heat::ScalingPolicy + properties: + adjustment_type: change_in_capacity + auto_scaling_group_id: { get_resource: scaleup_group } + cooldown: 60 + scaling_adjustment: -1 + + cpu_alarm_high: + type: OS::Aodh::GnocchiAggregationByResourcesAlarm + properties: + description: Scale up instance if CPU > 50% + metric: cpu + aggregation_method: rate:mean + granularity: 300 + evaluation_periods: 1 + threshold: 30000000000.0 + resource_type: instance + comparison_operator: gt + alarm_actions: + - str_replace: + template: trust+url + params: + url: {get_attr: [scaleup_policy, signal_url]} + query: + list_join: + - '' + - - {'=': {server_group: {get_param: "OS::stack_id"}}} + + cpu_alarm_low: + type: OS::Aodh::GnocchiAggregationByResourcesAlarm + properties: + description: Scale down instance if CPU < 20% + metric: cpu + aggregation_method: rate:mean + granularity: 300 + evaluation_periods: 1 + threshold: 12000000000.0 + resource_type: instance + comparison_operator: lt + alarm_actions: + - str_replace: + template: trust+url + params: + url: {get_attr: [scaledown_policy, signal_url]} + query: + list_join: + - '' + - - {'=': {server_group: {get_param: "OS::stack_id"}}} + + outputs: + scaleup_policy_signal_url: + value: {get_attr: [scaleup_policy, alarm_url]} + + scaledown_policy_signal_url: + value: {get_attr: [scaledown_policy, alarm_url]} diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml new file mode 100644 index 0000000..b5235d2 --- /dev/null +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -0,0 +1,86 @@ +--- +- name: Create the stack + shell: | + #source ~/overcloudrc; + {{ openstack_cmd }} stack create \ + -t $HOME/templates/autoscaling/vnf/template.yaml \ + -e $HOME/templates/autoscaling/vnf/resources.yaml \ + {{ stack_name }}; + register: result + failed_when: result.rc >= 1 + +- name: Wait for 60 sec + pause: + minutes: 1 + +- name: Verify that the stack was created successfully + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status; + register: result + failed_when: '"CREATE_COMPLETE" not in result.stdout' + +- name: Verify that the stack resources are created + shell: | + # source ~/overcloudrc; + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); + {{ openstack_cmd }} stack resource list $STACK_ID; + register: result + failed_when: '"CREATE_COMPLETE" not in result.stdout' + +- name: Verify that an instance was launched by the stack creation + shell: | + # source ~/overcloudrc; + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); + {{ openstack_cmd }} server list --long | grep $STACK_ID; + register: result + failed_when: result.rc >= 1 + +- name: Verify that the alarms were created for the stack + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} alarm list + register: result + failed_when: result.rc >= 1 + +- name: Note the physical_resource_id values for the cpu_alarm_low resource + shell: | + # source ~/overcloudrc; + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); + {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}' + register: physical_resource_id_low + +- name: Note the physical_resource_id values for the cpu_alarm_high resource + shell: | + # source ~/overcloudrc; + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); + {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}' + register: physical_resource_id_high + +- name: Verify physical_resource_id match the alarm id for cpu_alarm_low + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}' + register: alarm_id_low + failed_when: + - physical_resource_id_low.stdout != alarm_id_low.stdout + +- name: Verify physical_resource_id match the alarm id for cpu_alarm_high + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}' + register: alarm_id_high + failed_when: + - physical_resource_id_high.stdout != alarm_id_high.stdout + +- name: Verify that metric resources exist for the stack + shell: | + # source ~/overcloudrc; + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); + {{ openstack_cmd }} metric resource search \ + --sort-column launched_at -c id \ + -c display_name -c launched_at \ + -c deleted_at --type instance \ + server_group="$STACK_ID" + register: result + failed_when: result.rc >= 1 diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml new file mode 100644 index 0000000..d4c7e17 --- /dev/null +++ b/roles/telemetry_autoscaling/tasks/main.yml @@ -0,0 +1,7 @@ +--- +# tasks file for telemetry_autoscaling +- include_tasks: verify_autoscaling.yml +- include_tasks: configure_heat.yml +- include_tasks: creating_stack.yml +- include_tasks: test_autoscaling.yml + diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml new file mode 100644 index 0000000..98951cb --- /dev/null +++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml @@ -0,0 +1,69 @@ +--- +- name: register instance IP + shell: | + # source ~/overcloudrc; + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value) + {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \ + grep -i $STACK_ID | \ + awk -F'=' '{print $2}' | \ + awk '{print $1}' + #awk -F',' '{print $1}' + register: vnf_instance_ip + +- debug: var=vnf_instance_ip + +- name: Verfiy the number of instances before scaling + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l + register: instance_count1 + +- name: Test automatic scaling up of instances + shell: | + sshpass -p gocubsgo ssh cirros@{{ item }} "sudo yes > /dev/null &" + register: result + with_items: "{{ vnf_instance_ip.stdout_lines }}" + +- name: Verify that the alarm has been triggered + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} alarm list -c state -c name -f value| \ + grep -i "cpu_alarm_high" | \ + awk '{print $2}' + retries: 100 + delay: 5 + register: result + until: result.stdout == "alarm" + +- name: Verify that the Orchestration service has scaled up the instances + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l + retries: 100 + delay: 5 + register: instance_count2 + until: instance_count2.stdout == "3" + +- name: Test automatic scaling down of instances + pause: + minutes: 5 + +- name: Verify that the alarm has been triggered + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} alarm list -c state -c name -f value| \ + grep -i "cpu_alarm_low" | \ + awk '{print $2}' + retries: 100 + delay: 5 + register: result + until: result.stdout == "alarm" + +- name: Verify that the Orchestration service has scaled down the instances + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l + retries: 100 + delay: 5 + register: instance_count3 + until: instance_count3.stdout == "1" diff --git a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml new file mode 100644 index 0000000..f3f90d0 --- /dev/null +++ b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml @@ -0,0 +1,42 @@ +--- +# This doesn't work when using prom as the metrics backend +# TODO: Add a metrics_backend option +- name: test service API endpoint(metric) for autoscaling + when: metrics_backend == "gnocchi" + shell: | + #source ~/stackrc; + {{ openstack_cmd }} endpoint list --service metric; + register: result + failed_when: result.rc >= 1 + +- name: test service API endpoint(alarm) for autoscaling + shell: | + #source ~/stackrc; + {{ openstack_cmd }} endpoint list --service alarming; + register: result + failed_when: result.rc >= 1 + +- name: test service API endpoint(heat) for autoscaling + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} endpoint list --service orchestration; + register: result + failed_when: result.rc >= 1 + + # need selection criteria to decide when to run these. + # Need alternative for OSP18. +- name: Verify that the services are running on the overcloud + shell: | + # source ~/overcloudrc; + sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh'; + register: result + failed_when: result.rc >= 1 + ignore_errors: true + +- name: Verify that the time-series database service is available + when: metrics_backend == "gnocchi" + shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} metric status --fit-width; + register: result + failed_when: result.rc >= 1 From 0cc659a5ae05ba1cf086037ad630cd83285ba33b Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Wed, 1 May 2024 15:08:59 -0400 Subject: [PATCH 02/17] [ansible-lint] Lint the role * remove trailing whitespace * use fqcn for modules * resolve "no-free-form" * name[casing]: All names should start with an uppercase letter. * name[missing]: All tasks should be named. * yaml[empty-lines]: Too many blank lines (1 > 0) --- .../tasks/configure_heat.yml | 16 +++++------- .../tasks/creating_stack.yml | 26 +++++++++---------- roles/telemetry_autoscaling/tasks/main.yml | 14 +++++++--- .../tasks/test_autoscaling.yml | 24 +++++++++-------- .../tasks/verify_autoscaling.yml | 16 ++++++------ 5 files changed, 50 insertions(+), 46 deletions(-) diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml index 58db9e5..c203168 100644 --- a/roles/telemetry_autoscaling/tasks/configure_heat.yml +++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml @@ -1,11 +1,7 @@ --- -- set_fact: - metrics_backend: "gnocchi" - when: not (metrics_backend is defined) - - name: Create the generic archive policy for autoscaling when: metrics_backend == "gnocchi" - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} metric archive-policy create generic \ --back-window 0 \ @@ -17,18 +13,18 @@ - name: Verify that the archive policy was created when: metrics_backend == "gnocchi" - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} metric archive-policy show generic; register: result failed_when: result.rc >= 1 - name: Create "vnf" directory under templates - shell: | + ansible.builtin.shell: | mkdir -p $HOME/templates/autoscaling/vnf/ - name: Configure heat template for automatically scaling instances - copy: + ansible.builtin.copy: dest: ~/templates/autoscaling/vnf/instance.yaml content: | heat_template_version: wallaby @@ -88,14 +84,14 @@ port_id: { get_resource: port } - name: Create the resource to reference in the heat template - copy: + ansible.builtin.copy: dest: ~/templates/autoscaling/vnf/resources.yaml content: | resource_registry: "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml - name: Create the deployment template for heat to control instance scaling - copy: + ansible.builtin.copy: dest: ~/templates/autoscaling/vnf/template.yaml content: | heat_template_version: wallaby diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index b5235d2..14fc325 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -1,6 +1,6 @@ --- - name: Create the stack - shell: | + ansible.builtin.shell: | #source ~/overcloudrc; {{ openstack_cmd }} stack create \ -t $HOME/templates/autoscaling/vnf/template.yaml \ @@ -10,18 +10,18 @@ failed_when: result.rc >= 1 - name: Wait for 60 sec - pause: + ansible.builtin.pause: minutes: 1 - name: Verify that the stack was created successfully - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status; register: result failed_when: '"CREATE_COMPLETE" not in result.stdout' - name: Verify that the stack resources are created - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} stack resource list $STACK_ID; @@ -29,7 +29,7 @@ failed_when: '"CREATE_COMPLETE" not in result.stdout' - name: Verify that an instance was launched by the stack creation - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} server list --long | grep $STACK_ID; @@ -37,44 +37,44 @@ failed_when: result.rc >= 1 - name: Verify that the alarms were created for the stack - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} alarm list register: result failed_when: result.rc >= 1 - name: Note the physical_resource_id values for the cpu_alarm_low resource - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}' register: physical_resource_id_low - name: Note the physical_resource_id values for the cpu_alarm_high resource - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}' register: physical_resource_id_high - name: Verify physical_resource_id match the alarm id for cpu_alarm_low - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}' register: alarm_id_low - failed_when: + failed_when: - physical_resource_id_low.stdout != alarm_id_low.stdout - name: Verify physical_resource_id match the alarm id for cpu_alarm_high - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}' register: alarm_id_high - failed_when: + failed_when: - physical_resource_id_high.stdout != alarm_id_high.stdout - name: Verify that metric resources exist for the stack - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} metric resource search \ diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml index d4c7e17..1d137d0 100644 --- a/roles/telemetry_autoscaling/tasks/main.yml +++ b/roles/telemetry_autoscaling/tasks/main.yml @@ -1,7 +1,13 @@ --- # tasks file for telemetry_autoscaling -- include_tasks: verify_autoscaling.yml -- include_tasks: configure_heat.yml -- include_tasks: creating_stack.yml -- include_tasks: test_autoscaling.yml +- name: Check pre-reqs for autoscaling + ansible.builtin.include_tasks: verify_autoscaling.yml +- name: Create the heat templates + ansible.builtin.include_tasks: configure_heat.yml + +- name: Launch the stack + ansible.builtin.include_tasks: creating_stack.yml + +- name: Run the autoscaling tests + ansible.builtin.include_tasks: test_autoscaling.yml diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml index 98951cb..021edf0 100644 --- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml +++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml @@ -1,6 +1,6 @@ --- -- name: register instance IP - shell: | +- name: Register instance IP + ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value) {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \ @@ -10,22 +10,24 @@ #awk -F',' '{print $1}' register: vnf_instance_ip -- debug: var=vnf_instance_ip +- name: Show the IP + ansible.builtin.debug: + var: vnf_instance_ip - name: Verfiy the number of instances before scaling - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l register: instance_count1 - name: Test automatic scaling up of instances - shell: | + ansible.builtin.shell: | sshpass -p gocubsgo ssh cirros@{{ item }} "sudo yes > /dev/null &" register: result with_items: "{{ vnf_instance_ip.stdout_lines }}" - name: Verify that the alarm has been triggered - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} alarm list -c state -c name -f value| \ grep -i "cpu_alarm_high" | \ @@ -35,8 +37,8 @@ register: result until: result.stdout == "alarm" -- name: Verify that the Orchestration service has scaled up the instances - shell: | +- name: Verify that the Orchestration service has scaled up the instances + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l retries: 100 @@ -45,11 +47,11 @@ until: instance_count2.stdout == "3" - name: Test automatic scaling down of instances - pause: + ansible.builtin.pause: minutes: 5 - name: Verify that the alarm has been triggered - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} alarm list -c state -c name -f value| \ grep -i "cpu_alarm_low" | \ @@ -60,7 +62,7 @@ until: result.stdout == "alarm" - name: Verify that the Orchestration service has scaled down the instances - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l retries: 100 diff --git a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml index f3f90d0..9ae42a3 100644 --- a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml +++ b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml @@ -1,23 +1,23 @@ --- # This doesn't work when using prom as the metrics backend # TODO: Add a metrics_backend option -- name: test service API endpoint(metric) for autoscaling +- name: Test service API endpoint(metric) for autoscaling when: metrics_backend == "gnocchi" - shell: | + ansible.builtin.shell: | #source ~/stackrc; {{ openstack_cmd }} endpoint list --service metric; register: result failed_when: result.rc >= 1 -- name: test service API endpoint(alarm) for autoscaling - shell: | +- name: Test service API endpoint(alarm) for autoscaling + ansible.builtin.shell: | #source ~/stackrc; {{ openstack_cmd }} endpoint list --service alarming; register: result failed_when: result.rc >= 1 -- name: test service API endpoint(heat) for autoscaling - shell: | +- name: Test service API endpoint(heat) for autoscaling + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} endpoint list --service orchestration; register: result @@ -26,7 +26,7 @@ # need selection criteria to decide when to run these. # Need alternative for OSP18. - name: Verify that the services are running on the overcloud - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh'; register: result @@ -35,7 +35,7 @@ - name: Verify that the time-series database service is available when: metrics_backend == "gnocchi" - shell: | + ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} metric status --fit-width; register: result From 24313c9dcfc6c7a7769a18150b7ba66683ff21e4 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Sun, 5 May 2024 10:45:44 -0400 Subject: [PATCH 03/17] Get OSP18 tests running --- playbooks/autoscaling_osp18.yaml | 27 +++- .../tasks/configure_heat.yml | 117 ++++++++++++++++-- .../tasks/creating_stack.yml | 29 ++++- .../tasks/test_autoscaling.yml | 32 ++++- 4 files changed, 187 insertions(+), 18 deletions(-) diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index 75b7fd4..ca39423 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -2,11 +2,36 @@ - hosts: localhost become: no name: Run the autoscaling tests + # NOTE: These vars should be eventually moved into a vars file. vars: openstack_cmd: "oc rsh openstackclient openstack" metrics_backend: "prometheus" - + stack_image: "cirros" + stack_flavor: "m1.small" + stack_network: "private" + stack_external_network: "public" tasks: + # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks + - import_role: + name: '../name/telemetry_autoscaling' + tasks_from: 'verify_autoscaling' + tags: + - precheck - import_role: name: '../roles/telemetry_autoscaling' + tasks_from: configure_heat + #tasks_from: creating_stack + tags: + - create + - import_role: + name: '../roles/telemetry_autoscaling' + tasks_from: creating_stack + tags: + - create + + - import_role: + name: '../roles/telemetry_autoscaling' + tasks_from: test_autoscaling + tags: + - test diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml index c203168..2e7099a 100644 --- a/roles/telemetry_autoscaling/tasks/configure_heat.yml +++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml @@ -1,4 +1,10 @@ --- +# RE: alarm definitions, it might be cleaner to add two of each alarm, and block off the entire resource, one for gnocchi and one for prom. +# instead of having if/else blocks inside the resource. +# +# TODO(efoley) Consider a heat stack to create the pre-reqs for the autoscaling example. +# i.e. create the sec group, network, etc +# TODO(efoley): Move the HOT templates into templates/ dir for the role - name: Create the generic archive policy for autoscaling when: metrics_backend == "gnocchi" ansible.builtin.shell: | @@ -23,6 +29,8 @@ ansible.builtin.shell: | mkdir -p $HOME/templates/autoscaling/vnf/ + # TODO: Pass the parameters correctly. When I tried to pass the parameters + # into the template.yaml file, they weren't passed to the instance initially. - name: Configure heat template for automatically scaling instances ansible.builtin.copy: dest: ~/templates/autoscaling/vnf/instance.yaml @@ -36,11 +44,11 @@ image: type: string description: image used to create instance - default: workload_image_1 + default: {{ stack_image | default("workload_image_1") }} flavor: type: string description: instance flavor to be used - default: workload_flavor_1 + default: {{ stack_flavor | default("workload_flavor_1") }} key_name: type: string description: keypair to be used @@ -48,18 +56,30 @@ network: type: string description: project network to attach instance to - default: workload_internal_net_1 + default: {{ stack_network | default("workload_internal_net_1") }} external_network: type: string description: network used for floating IPs - default: public + default: {{ stack_external_network | default("public") }} + server_name_prefix: + type: string + description: a prefix for each server name. + default: "" + security_group: + type: string + description: the security group for the instances + default: basic resources: vnf: type: OS::Nova::Server properties: + {% if metrics_backend == "prometheus" -%} + name: + list_join: ["", [{get_param: server_name_prefix}, {get_param: OS::stack_name}]] + {% endif -%} flavor: {get_param: flavor} - key_name: {get_param: key_name} + #key_name: {get_param: key_name} image: { get_param: image } metadata: { get_param: metadata } networks: @@ -70,7 +90,7 @@ properties: network: {get_param: network} security_groups: - - workload_secgroup_1 + - { get_param: security_group } floating_ip: type: OS::Neutron::FloatingIP @@ -88,7 +108,12 @@ dest: ~/templates/autoscaling/vnf/resources.yaml content: | resource_registry: - "OS::Nova::Server::VNF": /home/stack/templates/autoscaling/vnf/instance.yaml + "OS::Nova::Server::VNF": ./instance.yaml + # parameters: + # image: cirros + # flavor: m1.small + # network: private + # security_group: basic - name: Create the deployment template for heat to control instance scaling ansible.builtin.copy: @@ -96,18 +121,56 @@ content: | heat_template_version: wallaby description: Example auto scale group, policy and alarm + parameters: + server_name_prefix: + description: A prefix for servers created by this stack. Can be used in queries. + type: string + default: autoscaling_server_ + #image: + # type: string + # description: image used to create instance + # default: "{{ stack_network| default( 'workload_image_1') }}" + #flavor: + # type: string + # description: instance flavor to be used + # default: "{{ stack_flavor | default('workload_flavor_1') }}" + #key_name: + # type: string + # description: keypair to be used + # default: workload_key_1 + #network: + # type: string + # description: project network to attach instance to + # default: "{{ stack_network | default('workload_internal_net_1') }}" + #external_network: + # type: string + # description: network used for floating IPs + # default: public + resources: scaleup_group: type: OS::Heat::AutoScalingGroup properties: max_size: 3 min_size: 1 - #desired_capacity: 1 + desired_capacity: 1 resource: type: OS::Nova::Server::VNF + # resource definieiton for the resource to be created by the group. + # this passes in parameters to the resource. + # So I can put a name_prefix here and it'll be set as the name... + # But the parameter is needed in the VNF. + # Can I set the name in an OS::Nova::Server? + # This sets the property/metadata for the VNF + # name should be set here, but maybe not as metadata + # network: { get_param: network } + # flavor: { get_param: flavor } + # image: { get_param: image } properties: + server_name_prefix: { get_param: server_name_prefix } metadata: {"metering.server_group": {get_param: "OS::stack_id"}} + scaleup_policy: type: OS::Heat::ScalingPolicy properties: @@ -125,15 +188,25 @@ scaling_adjustment: -1 cpu_alarm_high: + {% if metrics_backend == "gnocchi" -%} type: OS::Aodh::GnocchiAggregationByResourcesAlarm + {% endif -%} + {% if metrics_backend == "prometheus" -%} + type: OS::Aodh::PrometheusAlarm + {% endif -%} properties: description: Scale up instance if CPU > 50% + {% if metrics_backend == "gnocchi" -%} metric: cpu aggregation_method: rate:mean granularity: 300 evaluation_periods: 1 - threshold: 30000000000.0 resource_type: instance + threshold: 30000000000.0 + {% endif -%} + {% if metrics_backend == "prometheus" -%} + threshold: 50 + {% endif -%} comparison_operator: gt alarm_actions: - str_replace: @@ -141,20 +214,36 @@ params: url: {get_attr: [scaleup_policy, signal_url]} query: + {% if metrics_backend == "gnocchi" -%} list_join: - '' - - {'=': {server_group: {get_param: "OS::stack_id"}}} + {% endif -%} + {% if metrics_backend == "prometheus" -%} + str_replace: + template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" + params: + server_name_prefix: {get_param: server_name_prefix} + {%- endif %} cpu_alarm_low: + {% if metrics_backend == "gnocchi" -%} type: OS::Aodh::GnocchiAggregationByResourcesAlarm + {% endif -%} + {% if metrics_backend == "prometheus" -%} + type: OS::Aodh::PrometheusAlarm + {% endif -%} properties: description: Scale down instance if CPU < 20% + {% if metrics_backend == "gnocchi" -%} metric: cpu aggregation_method: rate:mean granularity: 300 evaluation_periods: 1 - threshold: 12000000000.0 resource_type: instance + threshold: 12000000000.0 + {% endif -%} + threshold: 20 comparison_operator: lt alarm_actions: - str_replace: @@ -162,9 +251,17 @@ params: url: {get_attr: [scaledown_policy, signal_url]} query: + {% if metrics_backend == "gnocchi" -%} list_join: - '' - - {'=': {server_group: {get_param: "OS::stack_id"}}} + {% endif -%} + {% if metrics_backend == "prometheus" -%} + str_replace: + template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" + params: + server_name_prefix: {get_param: server_name_prefix} + {% endif %} outputs: scaleup_policy_signal_url: diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index 14fc325..45ccb33 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -1,10 +1,33 @@ --- +- name: Copy the templates to openstackclient pod + ansible.builtin.shell: | + oc cp $HOME/templates/autoscaling/vnf/template.yaml openstackclient:/tmp + oc cp $HOME/templates/autoscaling/vnf/resources.yaml openstackclient:/tmp + oc cp $HOME/templates/autoscaling/vnf/instance.yaml openstackclient:/tmp + + # template location was $HOME/templates/autoscaling/vnf/ +- name: "[temp] Set the location to copy the template to" + ansible.builtin.set_fact: + template_location: '/tmp' + + # I need to pass parameters to the stack create + # TODO: Define parameters as vars, and add them to the role. + # TODO: Update pre-checks to make sure the image, flavor, key, network and external_network exist. + # Temperority use the install_yamls/devinstall make edpm_deploy_instance to make these resources. + # This maketarget uses the devsetup/scripts/edpm-deploy-instance.sh script + # TODO: Either update the test to create the required resources, or add checks to make sure they exist. +- ansible.builtin.set_fact: + stack_image: cirros + stack_flavor: m1.small + #stack_keyname: + stack_network: private + stack_external_network: public - name: Create the stack ansible.builtin.shell: | #source ~/overcloudrc; {{ openstack_cmd }} stack create \ - -t $HOME/templates/autoscaling/vnf/template.yaml \ - -e $HOME/templates/autoscaling/vnf/resources.yaml \ + -t {{ template_location }}/template.yaml \ + -e {{ template_location }}/resources.yaml \ {{ stack_name }}; register: result failed_when: result.rc >= 1 @@ -73,7 +96,9 @@ failed_when: - physical_resource_id_high.stdout != alarm_id_high.stdout + # TODO: get alt check for prom - name: Verify that metric resources exist for the stack + when: metrics_backend == "gnocchi" ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml index 021edf0..d9a144a 100644 --- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml +++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml @@ -1,29 +1,41 @@ --- + # NOTE: The format of the output appears to have changed; There are now 2 + # IP addresses, and the second one is the one associated with the floating IP - name: Register instance IP ansible.builtin.shell: | # source ~/overcloudrc; export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value) {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \ grep -i $STACK_ID | \ - awk -F'=' '{print $2}' | \ - awk '{print $1}' - #awk -F',' '{print $1}' + awk -F'=' '{print $2}' | \ + awk -F'|' '{print $1}' | \ + awk -F',' '{print $2}' register: vnf_instance_ip - name: Show the IP ansible.builtin.debug: var: vnf_instance_ip +- when: vnf_instance_ip.stdout | length == 0 + fail: + msg: "bad vnf_instance_ip" + - name: Verfiy the number of instances before scaling ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l register: instance_count1 + # NOTE: Disabling strict host key checking so that ssh doesn't give an error + # when the host key changes i.e. if a new VM has been assigned a + # previously-used IP address, which will happen during local testing but not + # in CI + # NOTE: the with_items is because I was capturing both IPs initially, and + # using both in case the order was not consistent. - name: Test automatic scaling up of instances ansible.builtin.shell: | - sshpass -p gocubsgo ssh cirros@{{ item }} "sudo yes > /dev/null &" - register: result + sshpass -p gocubsgo ssh -o StrictHostKeyChecking=False cirros@{{ item | trim }} "sudo yes > /dev/null &" + register: busy_process with_items: "{{ vnf_instance_ip.stdout_lines }}" - name: Verify that the alarm has been triggered @@ -46,6 +58,12 @@ register: instance_count2 until: instance_count2.stdout == "3" +- name: Stop the busy process + ansible.builtin.shell: | + sshpass -p gocubsgo ssh cirros@{{ item | trim }} "sudo killall yes" + register: kill_busy_process + with_items: "{{ vnf_instance_ip.stdout_lines }}" + - name: Test automatic scaling down of instances ansible.builtin.pause: minutes: 5 @@ -61,6 +79,10 @@ register: result until: result.stdout == "alarm" + # TODO: the metering.server group metadata was used for gnocchi alarm + # selection. + # prom uses the instance name, so the metadata MIGHT be removed, and a new + # check for whether the scaling group has scaled down may be needed. - name: Verify that the Orchestration service has scaled down the instances ansible.builtin.shell: | # source ~/overcloudrc; From 2d5a006c53780fd973be8635f9f235771af1b907 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Tue, 7 May 2024 08:33:07 -0400 Subject: [PATCH 04/17] Fix syntax error --- playbooks/autoscaling_osp18.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index ca39423..9e5f7eb 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -13,7 +13,7 @@ tasks: # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks - import_role: - name: '../name/telemetry_autoscaling' + name: '../roles/telemetry_autoscaling' tasks_from: 'verify_autoscaling' tags: - precheck From 13ae82647b516be0e58073c4658a7c7dfe51127b Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Tue, 7 May 2024 08:34:29 -0400 Subject: [PATCH 05/17] [test_autoscaling] Add a task to update the known_hosts file To remove previous entries for the IP address, since the VMs will reuse the IP addresses, and this will cause the ssh commands to fail, due to looking like a main-in-the-middle attack. --- .../tasks/test_autoscaling.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml index d9a144a..3f15570 100644 --- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml +++ b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml @@ -26,12 +26,31 @@ {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l register: instance_count1 +- shell: | + cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}" + with_items: "{{ vnf_instance_ip.stdout_lines }}" + ignore_errors: true + +- name: Remove the existing hostkey, if there is one for the target IP + ansible.builtin.lineinfile: + dest: '{{ ansible_env.HOME }}/.ssh/known_hosts' + state: absent + regexp: "{{ item |trim }}" + with_items: "{{ vnf_instance_ip.stdout_lines }}" + +- shell: | + cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}" + with_items: "{{ vnf_instance_ip.stdout_lines }}" + ignore_errors: true + # NOTE: Disabling strict host key checking so that ssh doesn't give an error # when the host key changes i.e. if a new VM has been assigned a # previously-used IP address, which will happen during local testing but not # in CI # NOTE: the with_items is because I was capturing both IPs initially, and # using both in case the order was not consistent. + # Disabling hostkey checking didn't work. I need to remove the key from the known_hosts file before trying to tun this. + # The key removal should move to some pre/pre-run stage. - name: Test automatic scaling up of instances ansible.builtin.shell: | sshpass -p gocubsgo ssh -o StrictHostKeyChecking=False cirros@{{ item | trim }} "sudo yes > /dev/null &" From 11b74f70755bb742d3e64a3acbcbb2289edd027b Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Tue, 7 May 2024 14:49:31 -0400 Subject: [PATCH 06/17] pre create resources using install_yamls --- playbooks/autoscaling_osp18.yaml | 7 +++++++ roles/telemetry_autoscaling/tasks/main.yml | 2 ++ 2 files changed, 9 insertions(+) diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index 9e5f7eb..c5a34f8 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -11,6 +11,13 @@ stack_network: "private" stack_external_network: "public" tasks: + # this is temperory and should be replaced later by some heat stack or some ansible + - community.general.make: + chdir: "{{ ansible_env.HOME }}/install_yamls/devsetup" + target: edpm_deploy_instance + tags: + - setup + # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks - import_role: name: '../roles/telemetry_autoscaling' diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml index 1d137d0..ab4eee9 100644 --- a/roles/telemetry_autoscaling/tasks/main.yml +++ b/roles/telemetry_autoscaling/tasks/main.yml @@ -1,5 +1,7 @@ --- # tasks file for telemetry_autoscaling + # TODO: Update the pre-checks to include the resources that the stack expects to exist + # i.e. networks, flavor, image, security group - name: Check pre-reqs for autoscaling ansible.builtin.include_tasks: verify_autoscaling.yml From d576e6dca2d55f30dceba8a71e589362fdf36820 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Tue, 7 May 2024 15:07:33 -0400 Subject: [PATCH 07/17] Add task to clone install_yamls --- playbooks/autoscaling_osp18.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index c5a34f8..840a246 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -12,8 +12,13 @@ stack_external_network: "public" tasks: # this is temperory and should be replaced later by some heat stack or some ansible + - ansible.builtin.command: + cmd: git clone http://github.com/openstack-k8s-operators/install_yamls + chdir: "{{ playbook_dir }}" + tags: + - setup - community.general.make: - chdir: "{{ ansible_env.HOME }}/install_yamls/devsetup" + chdir: "{{ playbook_dir }}/install_yamls/devsetup" target: edpm_deploy_instance tags: - setup From ac0ec95bc4815418f52fe7b221a1f6370c40cac3 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Thu, 9 May 2024 12:38:27 -0400 Subject: [PATCH 08/17] patch the openstack cloud to use the expected versions of aodh and heat --- playbooks/autoscaling_osp18.yaml | 28 ++++++++++++++++++++++++++++ playbooks/patch.yaml | 9 +++++++++ 2 files changed, 37 insertions(+) create mode 100644 playbooks/patch.yaml diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index 840a246..4760463 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -17,12 +17,40 @@ chdir: "{{ playbook_dir }}" tags: - setup + - community.general.make: chdir: "{{ playbook_dir }}/install_yamls/devsetup" target: edpm_deploy_instance tags: - setup + - name: Patch the openstackversions to use the master containers for aodh and heat + ansible.builtin.shell: + cmd: | + oc patch openstackversions openstack-galera-network-isolation --type merge --patch-file patch.yaml + tags: + - setup + + - name: patch observabilityclient into openstackclient + shell: + cmd: | + oc exec openstackclient -- python3 -m ensurepip --upgrade + oc exec openstackclient -- python3 -m pip install --upgrade aodhclient + oc exec openstackclient -- python3 -m pip install python-observabilityclient + tags: + - setup + + - name: Wait until the oscp is resolved the changes to continue + ansible.builtin.shell: + cmd: | + oc get oscp | grep "Setup complete" + retries: 24 + timeout: 5 + until: output.stdout_lines | length == 1 + register: output + tags: + - setup + # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks - import_role: name: '../roles/telemetry_autoscaling' diff --git a/playbooks/patch.yaml b/playbooks/patch.yaml new file mode 100644 index 0000000..a0b2e2b --- /dev/null +++ b/playbooks/patch.yaml @@ -0,0 +1,9 @@ +spec: + customContainerImages: + aodhAPIImage: quay.io/podified-master-centos9/openstack-aodh-api:current-podified + aodhEvaluatorImage: quay.io/podified-master-centos9/openstack-aodh-evaluator:current-podified + aodhListenerImage: quay.io/podified-master-centos9/openstack-aodh-listener:current-podified + aodhNotifierImage: quay.io/podified-master-centos9/openstack-aodh-notifier:current-podified + heatAPIImage: quay.io/podified-master-centos9/openstack-heat-api:current-podified + heatCfnapiImage: quay.io/podified-master-centos9/openstack-heat-api-cfn:current-podified + heatEngineImage: quay.io/podified-master-centos9/openstack-heat-engine:current-podified From 921375b6e23c14e74f3a8047d0a167909cf4e0a4 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Fri, 10 May 2024 12:00:43 -0400 Subject: [PATCH 09/17] Update test for stack creation --- .../tasks/creating_stack.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index 45ccb33..43c6330 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -32,16 +32,18 @@ register: result failed_when: result.rc >= 1 -- name: Wait for 60 sec - ansible.builtin.pause: - minutes: 1 + #- name: Wait for 60 sec + # ansible.builtin.pause: + # minutes: 1 - name: Verify that the stack was created successfully ansible.builtin.shell: | # source ~/overcloudrc; {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status; register: result - failed_when: '"CREATE_COMPLETE" not in result.stdout' + until: '"CREATE_COMPLETE" in result.stdout' + timeout: 30 + retries: 20 - name: Verify that the stack resources are created ansible.builtin.shell: | @@ -49,7 +51,10 @@ export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} stack resource list $STACK_ID; register: result - failed_when: '"CREATE_COMPLETE" not in result.stdout' + timeout: 30 + retries: 20 + until: '"CREATE_COMPLETE" in result.stdout' + # failed_when: '"CREATE_COMPLETE" not in result.stdout' - name: Verify that an instance was launched by the stack creation ansible.builtin.shell: | From 9d6729de975df7157d4d02455fb5603a4635d3f3 Mon Sep 17 00:00:00 2001 From: mgirgisf Date: Mon, 13 May 2024 10:30:01 +0200 Subject: [PATCH 10/17] debug stack failure --- roles/telemetry_autoscaling/tasks/creating_stack.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index 43c6330..5a55328 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -35,7 +35,19 @@ #- name: Wait for 60 sec # ansible.builtin.pause: # minutes: 1 +- name: Debug that the stack resources + block: + - name: Verify that the alarms were created for the stack + ansible.builtin.shell: | + {{ openstack_cmd }} alarm list + register: result + failed_when: result.rc >= 1 + - name: Verify that the stack resources are created + ansible.builtin.shell: | + export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); + {{ openstack_cmd }} stack resource list $STACK_ID; + - name: Verify that the stack was created successfully ansible.builtin.shell: | # source ~/overcloudrc; From b7af4946d74c8845c6328a68ed759c3ee325583c Mon Sep 17 00:00:00 2001 From: mgirgisf Date: Mon, 13 May 2024 12:54:58 +0200 Subject: [PATCH 11/17] debug the stack resource --- .../tasks/creating_stack.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index 5a55328..b4a128b 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -36,8 +36,12 @@ # ansible.builtin.pause: # minutes: 1 - name: Debug that the stack resources - block: - - name: Verify that the alarms were created for the stack + block: + - name: Wait for 60 sec + ansible.builtin.pause: + minutes: 1 + + - name: show that the alarms created for the stack ansible.builtin.shell: | {{ openstack_cmd }} alarm list register: result @@ -47,6 +51,15 @@ ansible.builtin.shell: | export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); {{ openstack_cmd }} stack resource list $STACK_ID; + + - name: Verify that the stack was created successfully + ansible.builtin.shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} stack show {{ stack_name }}; + register: result + until: '"CREATE_COMPLETE" in result.stdout' + timeout: 30 + retries: 20 - name: Verify that the stack was created successfully ansible.builtin.shell: | From e6950d7666c7e6a58c51aeecc4f289be8794f5e1 Mon Sep 17 00:00:00 2001 From: mgirgisf Date: Tue, 14 May 2024 11:43:26 +0200 Subject: [PATCH 12/17] update query --- .../tasks/configure_heat.yml | 4 +-- .../tasks/creating_stack.yml | 31 +++---------------- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml index 2e7099a..8ccfce0 100644 --- a/roles/telemetry_autoscaling/tasks/configure_heat.yml +++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml @@ -221,7 +221,7 @@ {% endif -%} {% if metrics_backend == "prometheus" -%} str_replace: - template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" + template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100" params: server_name_prefix: {get_param: server_name_prefix} {%- endif %} @@ -258,7 +258,7 @@ {% endif -%} {% if metrics_backend == "prometheus" -%} str_replace: - template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" + template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100" params: server_name_prefix: {get_param: server_name_prefix} {% endif %} diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index b4a128b..15bbd97 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -32,39 +32,18 @@ register: result failed_when: result.rc >= 1 +- name: Print the result + ansible.builtin.debug: + var: result + #- name: Wait for 60 sec # ansible.builtin.pause: # minutes: 1 -- name: Debug that the stack resources - block: - - name: Wait for 60 sec - ansible.builtin.pause: - minutes: 1 - - - name: show that the alarms created for the stack - ansible.builtin.shell: | - {{ openstack_cmd }} alarm list - register: result - failed_when: result.rc >= 1 - - - name: Verify that the stack resources are created - ansible.builtin.shell: | - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID; - - - name: Verify that the stack was created successfully - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} stack show {{ stack_name }}; - register: result - until: '"CREATE_COMPLETE" in result.stdout' - timeout: 30 - retries: 20 - name: Verify that the stack was created successfully ansible.builtin.shell: | # source ~/overcloudrc; - {{ openstack_cmd }} stack show {{ stack_name }} -c id -c stack_status; + {{ openstack_cmd }} stack show {{ stack_name }}; register: result until: '"CREATE_COMPLETE" in result.stdout' timeout: 30 From e65feeac0d54696884414a7b202632fcaff8ad76 Mon Sep 17 00:00:00 2001 From: mgirgisf Date: Tue, 14 May 2024 12:38:06 +0200 Subject: [PATCH 13/17] update query --- .../telemetry_autoscaling/tasks/creating_stack.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml index 15bbd97..fa853e3 100644 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ b/roles/telemetry_autoscaling/tasks/creating_stack.yml @@ -35,7 +35,7 @@ - name: Print the result ansible.builtin.debug: var: result - + #- name: Wait for 60 sec # ansible.builtin.pause: # minutes: 1 @@ -59,6 +59,17 @@ retries: 20 until: '"CREATE_COMPLETE" in result.stdout' # failed_when: '"CREATE_COMPLETE" not in result.stdout' + +- name: Verify that ceilometer_cpu metric exist + ansible.builtin.shell: | + # source ~/overcloudrc; + {{ openstack_cmd }} metric list + register: result + failed_when: result.rc >= 1 + +- name: Print the result + ansible.builtin.debug: + var: result - name: Verify that an instance was launched by the stack creation ansible.builtin.shell: | From 2f613ef477d7f6799c6f2ef70eeccf52205c574f Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Tue, 14 May 2024 12:01:48 -0400 Subject: [PATCH 14/17] Update python-observabilityclient version and alarm query When updating the python-observabilityclient, the flag was omitted, and package wasn't re-installed. The alarm query needed to be reverted, since it was revised when debugging. --- playbooks/autoscaling_osp18.yaml | 2 +- roles/telemetry_autoscaling/tasks/configure_heat.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index 4760463..221356c 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -36,7 +36,7 @@ cmd: | oc exec openstackclient -- python3 -m ensurepip --upgrade oc exec openstackclient -- python3 -m pip install --upgrade aodhclient - oc exec openstackclient -- python3 -m pip install python-observabilityclient + oc exec openstackclient -- python3 -m pip install --upgrade python-observabilityclient tags: - setup diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml index 8ccfce0..2e7099a 100644 --- a/roles/telemetry_autoscaling/tasks/configure_heat.yml +++ b/roles/telemetry_autoscaling/tasks/configure_heat.yml @@ -221,7 +221,7 @@ {% endif -%} {% if metrics_backend == "prometheus" -%} str_replace: - template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100" + template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" params: server_name_prefix: {get_param: server_name_prefix} {%- endif %} @@ -258,7 +258,7 @@ {% endif -%} {% if metrics_backend == "prometheus" -%} str_replace: - template: "(rate(ceilometer_cpu{resource_name=~'stack_name.*'}[1m])) * 100" + template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" params: server_name_prefix: {get_param: server_name_prefix} {% endif %} From 27d15dda14c7df7e298ae10d5d883cdfe0331ab5 Mon Sep 17 00:00:00 2001 From: Emma Foley Date: Tue, 14 May 2024 15:32:03 -0400 Subject: [PATCH 15/17] Remove the telemetry_autoscaling role and use it from fvt instead The telemetry_autoscaling role has been moved to http://github.com/infrawatch/feature-verification-tests --- playbooks/autoscaling_osp18.yaml | 24 +- roles/telemetry_autoscaling/README.md | 38 --- roles/telemetry_autoscaling/defaults/main.yml | 5 - roles/telemetry_autoscaling/meta/main.yml | 52 ---- .../tasks/configure_heat.yml | 271 ------------------ .../tasks/creating_stack.yml | 131 --------- roles/telemetry_autoscaling/tasks/main.yml | 15 - .../tasks/test_autoscaling.yml | 112 -------- .../tasks/verify_autoscaling.yml | 42 --- 9 files changed, 20 insertions(+), 670 deletions(-) delete mode 100644 roles/telemetry_autoscaling/README.md delete mode 100644 roles/telemetry_autoscaling/defaults/main.yml delete mode 100644 roles/telemetry_autoscaling/meta/main.yml delete mode 100644 roles/telemetry_autoscaling/tasks/configure_heat.yml delete mode 100644 roles/telemetry_autoscaling/tasks/creating_stack.yml delete mode 100644 roles/telemetry_autoscaling/tasks/main.yml delete mode 100644 roles/telemetry_autoscaling/tasks/test_autoscaling.yml delete mode 100644 roles/telemetry_autoscaling/tasks/verify_autoscaling.yml diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index 221356c..cc7e7a3 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -12,9 +12,25 @@ stack_external_network: "public" tasks: # this is temperory and should be replaced later by some heat stack or some ansible + + - set_fact: + fvt_dir: "{{ playbook_dir }}/feature-verification-tests" + when: "{{ not fvt_dir is defined }}" + tags: + always + + - debug: var=fvt_dir + + - command: + cmd: git clone http://github.com/infrawatch/feature-verification-tests -b efoley-add_telemetry_autoscaling {{ fvt_dir }} + creates: "{{ fvt_dir }}" + tags: + - setup + - ansible.builtin.command: cmd: git clone http://github.com/openstack-k8s-operators/install_yamls chdir: "{{ playbook_dir }}" + creates: "{{ playbook_dir }}/install_yamls" tags: - setup @@ -53,25 +69,25 @@ # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks - import_role: - name: '../roles/telemetry_autoscaling' + name: '{{ fvt_dir }}roles/telemetry_autoscaling' tasks_from: 'verify_autoscaling' tags: - precheck - import_role: - name: '../roles/telemetry_autoscaling' + name: '{{ fvt_dir }}/roles/telemetry_autoscaling' tasks_from: configure_heat #tasks_from: creating_stack tags: - create - import_role: - name: '../roles/telemetry_autoscaling' + name: '{{ fvt_dir }}/roles/telemetry_autoscaling' tasks_from: creating_stack tags: - create - import_role: - name: '../roles/telemetry_autoscaling' + name: '{{ fvt_dir }}/roles/telemetry_autoscaling' tasks_from: test_autoscaling tags: - test diff --git a/roles/telemetry_autoscaling/README.md b/roles/telemetry_autoscaling/README.md deleted file mode 100644 index 225dd44..0000000 --- a/roles/telemetry_autoscaling/README.md +++ /dev/null @@ -1,38 +0,0 @@ -Role Name -========= - -A brief description of the role goes here. - -Requirements ------------- - -Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required. - -Role Variables --------------- - -A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. - -Dependencies ------------- - -A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. - -Example Playbook ----------------- - -Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: - - - hosts: servers - roles: - - { role: username.rolename, x: 42 } - -License -------- - -BSD - -Author Information ------------------- - -An optional section for the role authors to include contact information, or a website (HTML is not allowed). diff --git a/roles/telemetry_autoscaling/defaults/main.yml b/roles/telemetry_autoscaling/defaults/main.yml deleted file mode 100644 index 0bd5b91..0000000 --- a/roles/telemetry_autoscaling/defaults/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -# defaults file for telemetry_autoscaling -openstack_cmd: "openstack" -stack_name: vnf -metrics_backend: gnocchi diff --git a/roles/telemetry_autoscaling/meta/main.yml b/roles/telemetry_autoscaling/meta/main.yml deleted file mode 100644 index c572acc..0000000 --- a/roles/telemetry_autoscaling/meta/main.yml +++ /dev/null @@ -1,52 +0,0 @@ -galaxy_info: - author: your name - description: your role description - company: your company (optional) - - # If the issue tracker for your role is not on github, uncomment the - # next line and provide a value - # issue_tracker_url: http://example.com/issue/tracker - - # Choose a valid license ID from https://spdx.org - some suggested licenses: - # - BSD-3-Clause (default) - # - MIT - # - GPL-2.0-or-later - # - GPL-3.0-only - # - Apache-2.0 - # - CC-BY-4.0 - license: license (GPL-2.0-or-later, MIT, etc) - - min_ansible_version: 2.1 - - # If this a Container Enabled role, provide the minimum Ansible Container version. - # min_ansible_container_version: - - # - # Provide a list of supported platforms, and for each platform a list of versions. - # If you don't wish to enumerate all versions for a particular platform, use 'all'. - # To view available platforms and versions (or releases), visit: - # https://galaxy.ansible.com/api/v1/platforms/ - # - # platforms: - # - name: Fedora - # versions: - # - all - # - 25 - # - name: SomePlatform - # versions: - # - all - # - 1.0 - # - 7 - # - 99.99 - - galaxy_tags: [] - # List tags for your role here, one per line. A tag is a keyword that describes - # and categorizes the role. Users find roles by searching for tags. Be sure to - # remove the '[]' above, if you add tags to this list. - # - # NOTE: A tag is limited to a single word comprised of alphanumeric characters. - # Maximum 20 tags per role. - -dependencies: [] - # List your role dependencies here, one per line. Be sure to remove the '[]' above, - # if you add dependencies to this list. diff --git a/roles/telemetry_autoscaling/tasks/configure_heat.yml b/roles/telemetry_autoscaling/tasks/configure_heat.yml deleted file mode 100644 index 2e7099a..0000000 --- a/roles/telemetry_autoscaling/tasks/configure_heat.yml +++ /dev/null @@ -1,271 +0,0 @@ ---- -# RE: alarm definitions, it might be cleaner to add two of each alarm, and block off the entire resource, one for gnocchi and one for prom. -# instead of having if/else blocks inside the resource. -# -# TODO(efoley) Consider a heat stack to create the pre-reqs for the autoscaling example. -# i.e. create the sec group, network, etc -# TODO(efoley): Move the HOT templates into templates/ dir for the role -- name: Create the generic archive policy for autoscaling - when: metrics_backend == "gnocchi" - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} metric archive-policy create generic \ - --back-window 0 \ - --definition timespan:'4:00:00',granularity:'0:01:00',points:240 \ - --aggregation-method 'rate:mean' \ - --aggregation-method 'mean'; - register: result - failed_when: result.rc >= 1 - -- name: Verify that the archive policy was created - when: metrics_backend == "gnocchi" - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} metric archive-policy show generic; - register: result - failed_when: result.rc >= 1 - -- name: Create "vnf" directory under templates - ansible.builtin.shell: | - mkdir -p $HOME/templates/autoscaling/vnf/ - - # TODO: Pass the parameters correctly. When I tried to pass the parameters - # into the template.yaml file, they weren't passed to the instance initially. -- name: Configure heat template for automatically scaling instances - ansible.builtin.copy: - dest: ~/templates/autoscaling/vnf/instance.yaml - content: | - heat_template_version: wallaby - description: Template to control scaling of VNF instance - - parameters: - metadata: - type: json - image: - type: string - description: image used to create instance - default: {{ stack_image | default("workload_image_1") }} - flavor: - type: string - description: instance flavor to be used - default: {{ stack_flavor | default("workload_flavor_1") }} - key_name: - type: string - description: keypair to be used - default: workload_key_1 - network: - type: string - description: project network to attach instance to - default: {{ stack_network | default("workload_internal_net_1") }} - external_network: - type: string - description: network used for floating IPs - default: {{ stack_external_network | default("public") }} - server_name_prefix: - type: string - description: a prefix for each server name. - default: "" - security_group: - type: string - description: the security group for the instances - default: basic - - resources: - vnf: - type: OS::Nova::Server - properties: - {% if metrics_backend == "prometheus" -%} - name: - list_join: ["", [{get_param: server_name_prefix}, {get_param: OS::stack_name}]] - {% endif -%} - flavor: {get_param: flavor} - #key_name: {get_param: key_name} - image: { get_param: image } - metadata: { get_param: metadata } - networks: - - port: { get_resource: port } - - port: - type: OS::Neutron::Port - properties: - network: {get_param: network} - security_groups: - - { get_param: security_group } - - floating_ip: - type: OS::Neutron::FloatingIP - properties: - floating_network: {get_param: external_network } - - floating_ip_assoc: - type: OS::Neutron::FloatingIPAssociation - properties: - floatingip_id: { get_resource: floating_ip } - port_id: { get_resource: port } - -- name: Create the resource to reference in the heat template - ansible.builtin.copy: - dest: ~/templates/autoscaling/vnf/resources.yaml - content: | - resource_registry: - "OS::Nova::Server::VNF": ./instance.yaml - # parameters: - # image: cirros - # flavor: m1.small - # network: private - # security_group: basic - -- name: Create the deployment template for heat to control instance scaling - ansible.builtin.copy: - dest: ~/templates/autoscaling/vnf/template.yaml - content: | - heat_template_version: wallaby - description: Example auto scale group, policy and alarm - parameters: - server_name_prefix: - description: A prefix for servers created by this stack. Can be used in queries. - type: string - default: autoscaling_server_ - #image: - # type: string - # description: image used to create instance - # default: "{{ stack_network| default( 'workload_image_1') }}" - #flavor: - # type: string - # description: instance flavor to be used - # default: "{{ stack_flavor | default('workload_flavor_1') }}" - #key_name: - # type: string - # description: keypair to be used - # default: workload_key_1 - #network: - # type: string - # description: project network to attach instance to - # default: "{{ stack_network | default('workload_internal_net_1') }}" - #external_network: - # type: string - # description: network used for floating IPs - # default: public - - resources: - scaleup_group: - type: OS::Heat::AutoScalingGroup - properties: - max_size: 3 - min_size: 1 - desired_capacity: 1 - resource: - type: OS::Nova::Server::VNF - # resource definieiton for the resource to be created by the group. - # this passes in parameters to the resource. - # So I can put a name_prefix here and it'll be set as the name... - # But the parameter is needed in the VNF. - # Can I set the name in an OS::Nova::Server? - # This sets the property/metadata for the VNF - # name should be set here, but maybe not as metadata - # network: { get_param: network } - # flavor: { get_param: flavor } - # image: { get_param: image } - properties: - server_name_prefix: { get_param: server_name_prefix } - metadata: {"metering.server_group": {get_param: "OS::stack_id"}} - - - scaleup_policy: - type: OS::Heat::ScalingPolicy - properties: - adjustment_type: change_in_capacity - auto_scaling_group_id: { get_resource: scaleup_group } - cooldown: 60 - scaling_adjustment: 1 - - scaledown_policy: - type: OS::Heat::ScalingPolicy - properties: - adjustment_type: change_in_capacity - auto_scaling_group_id: { get_resource: scaleup_group } - cooldown: 60 - scaling_adjustment: -1 - - cpu_alarm_high: - {% if metrics_backend == "gnocchi" -%} - type: OS::Aodh::GnocchiAggregationByResourcesAlarm - {% endif -%} - {% if metrics_backend == "prometheus" -%} - type: OS::Aodh::PrometheusAlarm - {% endif -%} - properties: - description: Scale up instance if CPU > 50% - {% if metrics_backend == "gnocchi" -%} - metric: cpu - aggregation_method: rate:mean - granularity: 300 - evaluation_periods: 1 - resource_type: instance - threshold: 30000000000.0 - {% endif -%} - {% if metrics_backend == "prometheus" -%} - threshold: 50 - {% endif -%} - comparison_operator: gt - alarm_actions: - - str_replace: - template: trust+url - params: - url: {get_attr: [scaleup_policy, signal_url]} - query: - {% if metrics_backend == "gnocchi" -%} - list_join: - - '' - - - {'=': {server_group: {get_param: "OS::stack_id"}}} - {% endif -%} - {% if metrics_backend == "prometheus" -%} - str_replace: - template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" - params: - server_name_prefix: {get_param: server_name_prefix} - {%- endif %} - - cpu_alarm_low: - {% if metrics_backend == "gnocchi" -%} - type: OS::Aodh::GnocchiAggregationByResourcesAlarm - {% endif -%} - {% if metrics_backend == "prometheus" -%} - type: OS::Aodh::PrometheusAlarm - {% endif -%} - properties: - description: Scale down instance if CPU < 20% - {% if metrics_backend == "gnocchi" -%} - metric: cpu - aggregation_method: rate:mean - granularity: 300 - evaluation_periods: 1 - resource_type: instance - threshold: 12000000000.0 - {% endif -%} - threshold: 20 - comparison_operator: lt - alarm_actions: - - str_replace: - template: trust+url - params: - url: {get_attr: [scaledown_policy, signal_url]} - query: - {% if metrics_backend == "gnocchi" -%} - list_join: - - '' - - - {'=': {server_group: {get_param: "OS::stack_id"}}} - {% endif -%} - {% if metrics_backend == "prometheus" -%} - str_replace: - template: "(rate(ceilometer_cpu{resource_name=~'server_name_prefix.*'}[150s]))/10000000" - params: - server_name_prefix: {get_param: server_name_prefix} - {% endif %} - - outputs: - scaleup_policy_signal_url: - value: {get_attr: [scaleup_policy, alarm_url]} - - scaledown_policy_signal_url: - value: {get_attr: [scaledown_policy, alarm_url]} diff --git a/roles/telemetry_autoscaling/tasks/creating_stack.yml b/roles/telemetry_autoscaling/tasks/creating_stack.yml deleted file mode 100644 index fa853e3..0000000 --- a/roles/telemetry_autoscaling/tasks/creating_stack.yml +++ /dev/null @@ -1,131 +0,0 @@ ---- -- name: Copy the templates to openstackclient pod - ansible.builtin.shell: | - oc cp $HOME/templates/autoscaling/vnf/template.yaml openstackclient:/tmp - oc cp $HOME/templates/autoscaling/vnf/resources.yaml openstackclient:/tmp - oc cp $HOME/templates/autoscaling/vnf/instance.yaml openstackclient:/tmp - - # template location was $HOME/templates/autoscaling/vnf/ -- name: "[temp] Set the location to copy the template to" - ansible.builtin.set_fact: - template_location: '/tmp' - - # I need to pass parameters to the stack create - # TODO: Define parameters as vars, and add them to the role. - # TODO: Update pre-checks to make sure the image, flavor, key, network and external_network exist. - # Temperority use the install_yamls/devinstall make edpm_deploy_instance to make these resources. - # This maketarget uses the devsetup/scripts/edpm-deploy-instance.sh script - # TODO: Either update the test to create the required resources, or add checks to make sure they exist. -- ansible.builtin.set_fact: - stack_image: cirros - stack_flavor: m1.small - #stack_keyname: - stack_network: private - stack_external_network: public -- name: Create the stack - ansible.builtin.shell: | - #source ~/overcloudrc; - {{ openstack_cmd }} stack create \ - -t {{ template_location }}/template.yaml \ - -e {{ template_location }}/resources.yaml \ - {{ stack_name }}; - register: result - failed_when: result.rc >= 1 - -- name: Print the result - ansible.builtin.debug: - var: result - - #- name: Wait for 60 sec - # ansible.builtin.pause: - # minutes: 1 - -- name: Verify that the stack was created successfully - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} stack show {{ stack_name }}; - register: result - until: '"CREATE_COMPLETE" in result.stdout' - timeout: 30 - retries: 20 - -- name: Verify that the stack resources are created - ansible.builtin.shell: | - # source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID; - register: result - timeout: 30 - retries: 20 - until: '"CREATE_COMPLETE" in result.stdout' - # failed_when: '"CREATE_COMPLETE" not in result.stdout' - -- name: Verify that ceilometer_cpu metric exist - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} metric list - register: result - failed_when: result.rc >= 1 - -- name: Print the result - ansible.builtin.debug: - var: result - -- name: Verify that an instance was launched by the stack creation - ansible.builtin.shell: | - # source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} server list --long | grep $STACK_ID; - register: result - failed_when: result.rc >= 1 - -- name: Verify that the alarms were created for the stack - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} alarm list - register: result - failed_when: result.rc >= 1 - -- name: Note the physical_resource_id values for the cpu_alarm_low resource - ansible.builtin.shell: | - # source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_low | awk '{print $4}' - register: physical_resource_id_low - -- name: Note the physical_resource_id values for the cpu_alarm_high resource - ansible.builtin.shell: | - # source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} stack resource list $STACK_ID |grep -i cpu_alarm_high | awk '{print $4}' - register: physical_resource_id_high - -- name: Verify physical_resource_id match the alarm id for cpu_alarm_low - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} alarm list |grep -i cpu_alarm_low | awk '{print $2}' - register: alarm_id_low - failed_when: - - physical_resource_id_low.stdout != alarm_id_low.stdout - -- name: Verify physical_resource_id match the alarm id for cpu_alarm_high - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} alarm list |grep -i cpu_alarm_high | awk '{print $2}' - register: alarm_id_high - failed_when: - - physical_resource_id_high.stdout != alarm_id_high.stdout - - # TODO: get alt check for prom -- name: Verify that metric resources exist for the stack - when: metrics_backend == "gnocchi" - ansible.builtin.shell: | - # source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value); - {{ openstack_cmd }} metric resource search \ - --sort-column launched_at -c id \ - -c display_name -c launched_at \ - -c deleted_at --type instance \ - server_group="$STACK_ID" - register: result - failed_when: result.rc >= 1 diff --git a/roles/telemetry_autoscaling/tasks/main.yml b/roles/telemetry_autoscaling/tasks/main.yml deleted file mode 100644 index ab4eee9..0000000 --- a/roles/telemetry_autoscaling/tasks/main.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- -# tasks file for telemetry_autoscaling - # TODO: Update the pre-checks to include the resources that the stack expects to exist - # i.e. networks, flavor, image, security group -- name: Check pre-reqs for autoscaling - ansible.builtin.include_tasks: verify_autoscaling.yml - -- name: Create the heat templates - ansible.builtin.include_tasks: configure_heat.yml - -- name: Launch the stack - ansible.builtin.include_tasks: creating_stack.yml - -- name: Run the autoscaling tests - ansible.builtin.include_tasks: test_autoscaling.yml diff --git a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml b/roles/telemetry_autoscaling/tasks/test_autoscaling.yml deleted file mode 100644 index 3f15570..0000000 --- a/roles/telemetry_autoscaling/tasks/test_autoscaling.yml +++ /dev/null @@ -1,112 +0,0 @@ ---- - # NOTE: The format of the output appears to have changed; There are now 2 - # IP addresses, and the second one is the one associated with the floating IP -- name: Register instance IP - ansible.builtin.shell: | - # source ~/overcloudrc; - export STACK_ID=$({{ openstack_cmd }} stack show {{ stack_name }} -c id -f value) - {{ openstack_cmd }} server list --long -c Networks -c 'Properties' | \ - grep -i $STACK_ID | \ - awk -F'=' '{print $2}' | \ - awk -F'|' '{print $1}' | \ - awk -F',' '{print $2}' - register: vnf_instance_ip - -- name: Show the IP - ansible.builtin.debug: - var: vnf_instance_ip - -- when: vnf_instance_ip.stdout | length == 0 - fail: - msg: "bad vnf_instance_ip" - -- name: Verfiy the number of instances before scaling - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} server list --long | grep -i 'metering.server_group' | wc -l - register: instance_count1 - -- shell: | - cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}" - with_items: "{{ vnf_instance_ip.stdout_lines }}" - ignore_errors: true - -- name: Remove the existing hostkey, if there is one for the target IP - ansible.builtin.lineinfile: - dest: '{{ ansible_env.HOME }}/.ssh/known_hosts' - state: absent - regexp: "{{ item |trim }}" - with_items: "{{ vnf_instance_ip.stdout_lines }}" - -- shell: | - cat {{ ansible_env.HOME }}/.ssh/known_hosts | grep "{{ item | trim }}" - with_items: "{{ vnf_instance_ip.stdout_lines }}" - ignore_errors: true - - # NOTE: Disabling strict host key checking so that ssh doesn't give an error - # when the host key changes i.e. if a new VM has been assigned a - # previously-used IP address, which will happen during local testing but not - # in CI - # NOTE: the with_items is because I was capturing both IPs initially, and - # using both in case the order was not consistent. - # Disabling hostkey checking didn't work. I need to remove the key from the known_hosts file before trying to tun this. - # The key removal should move to some pre/pre-run stage. -- name: Test automatic scaling up of instances - ansible.builtin.shell: | - sshpass -p gocubsgo ssh -o StrictHostKeyChecking=False cirros@{{ item | trim }} "sudo yes > /dev/null &" - register: busy_process - with_items: "{{ vnf_instance_ip.stdout_lines }}" - -- name: Verify that the alarm has been triggered - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} alarm list -c state -c name -f value| \ - grep -i "cpu_alarm_high" | \ - awk '{print $2}' - retries: 100 - delay: 5 - register: result - until: result.stdout == "alarm" - -- name: Verify that the Orchestration service has scaled up the instances - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} server list --long|grep -i metering.server_group | wc -l - retries: 100 - delay: 5 - register: instance_count2 - until: instance_count2.stdout == "3" - -- name: Stop the busy process - ansible.builtin.shell: | - sshpass -p gocubsgo ssh cirros@{{ item | trim }} "sudo killall yes" - register: kill_busy_process - with_items: "{{ vnf_instance_ip.stdout_lines }}" - -- name: Test automatic scaling down of instances - ansible.builtin.pause: - minutes: 5 - -- name: Verify that the alarm has been triggered - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} alarm list -c state -c name -f value| \ - grep -i "cpu_alarm_low" | \ - awk '{print $2}' - retries: 100 - delay: 5 - register: result - until: result.stdout == "alarm" - - # TODO: the metering.server group metadata was used for gnocchi alarm - # selection. - # prom uses the instance name, so the metadata MIGHT be removed, and a new - # check for whether the scaling group has scaled down may be needed. -- name: Verify that the Orchestration service has scaled down the instances - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} server list --long|grep -i metering.server_group |wc -l - retries: 100 - delay: 5 - register: instance_count3 - until: instance_count3.stdout == "1" diff --git a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml b/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml deleted file mode 100644 index 9ae42a3..0000000 --- a/roles/telemetry_autoscaling/tasks/verify_autoscaling.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- -# This doesn't work when using prom as the metrics backend -# TODO: Add a metrics_backend option -- name: Test service API endpoint(metric) for autoscaling - when: metrics_backend == "gnocchi" - ansible.builtin.shell: | - #source ~/stackrc; - {{ openstack_cmd }} endpoint list --service metric; - register: result - failed_when: result.rc >= 1 - -- name: Test service API endpoint(alarm) for autoscaling - ansible.builtin.shell: | - #source ~/stackrc; - {{ openstack_cmd }} endpoint list --service alarming; - register: result - failed_when: result.rc >= 1 - -- name: Test service API endpoint(heat) for autoscaling - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} endpoint list --service orchestration; - register: result - failed_when: result.rc >= 1 - - # need selection criteria to decide when to run these. - # Need alternative for OSP18. -- name: Verify that the services are running on the overcloud - ansible.builtin.shell: | - # source ~/overcloudrc; - sudo podman ps --filter=name='heat|gnocchi|ceilometer|aodh'; - register: result - failed_when: result.rc >= 1 - ignore_errors: true - -- name: Verify that the time-series database service is available - when: metrics_backend == "gnocchi" - ansible.builtin.shell: | - # source ~/overcloudrc; - {{ openstack_cmd }} metric status --fit-width; - register: result - failed_when: result.rc >= 1 From 49f34468388bef7f8b7733bbf663251da53f10f9 Mon Sep 17 00:00:00 2001 From: mgirgisf Date: Wed, 15 May 2024 09:23:12 +0200 Subject: [PATCH 16/17] update clone task for fvt --- playbooks/autoscaling_osp18.yaml | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/playbooks/autoscaling_osp18.yaml b/playbooks/autoscaling_osp18.yaml index cc7e7a3..3abbb5f 100644 --- a/playbooks/autoscaling_osp18.yaml +++ b/playbooks/autoscaling_osp18.yaml @@ -10,22 +10,15 @@ stack_flavor: "m1.small" stack_network: "private" stack_external_network: "public" + # this is temperory and should be replaced later by some heat stack or some ansible + fvt_dir: "{{ playbook_dir }}/feature-verification-tests" tasks: - # this is temperory and should be replaced later by some heat stack or some ansible - - - set_fact: - fvt_dir: "{{ playbook_dir }}/feature-verification-tests" - when: "{{ not fvt_dir is defined }}" - tags: - always - - - debug: var=fvt_dir - - - command: - cmd: git clone http://github.com/infrawatch/feature-verification-tests -b efoley-add_telemetry_autoscaling {{ fvt_dir }} - creates: "{{ fvt_dir }}" - tags: - - setup + # this is temperory + - name: Clone feature-verification repository + git: + repo: http://github.com/infrawatch/feature-verification-tests + dest: "{{ fvt_dir }}" + version: efoley-add_telemetry_autoscaling - ansible.builtin.command: cmd: git clone http://github.com/openstack-k8s-operators/install_yamls @@ -69,7 +62,7 @@ # NOTE: the tags are for testing/development, eventually, the role will just be imported, and main.yml will run through the tasks - import_role: - name: '{{ fvt_dir }}roles/telemetry_autoscaling' + name: '{{ fvt_dir }}/roles/telemetry_autoscaling' tasks_from: 'verify_autoscaling' tags: - precheck From 7b4bc4244e92c260f7a20c65023b822a3a4da2ba Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Wed, 15 May 2024 06:52:38 -0400 Subject: [PATCH 17/17] Added pre-config file to test autoscaling on OSP18 --- playbooks/preconfig_osp18.yaml | 69 ++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 playbooks/preconfig_osp18.yaml diff --git a/playbooks/preconfig_osp18.yaml b/playbooks/preconfig_osp18.yaml new file mode 100644 index 0000000..0220079 --- /dev/null +++ b/playbooks/preconfig_osp18.yaml @@ -0,0 +1,69 @@ +#!/usr/bin/env ansible-playbook +--- +# Pre-config steps to setup autoscaling for OSP18 + +- hosts: localhost + become: no + name: Pre-config steps for Autoscaling on OSP18 + + tasks: + - name: Install openstackclient dependencies + shell: + cmd: | + oc exec -ti openstackclient -- python3 -m ensurepip --upgrade + oc exec -ti openstackclient -- python3 -m pip install --upgrade aodhclient + oc exec -ti openstackclient -- python3 -m pip install python-observabilityclient + + - name: Install Cluster Observability Operator(COO) + ansible.builtin.shell: + cmd: | + oc create -f - <= 1 + + - name: Wait for the installation to succeed + pause: + minutes: 1 + + - name: Verify COO is installed successfully + ansible.builtin.command: + cmd: | + oc wait --for jsonpath="{.status.phase}"=Succeeded csv --namespace=openshift-operators -l operators.coreos.com/cluster-observability-operator.openshift-operators + register: result + changed_when: false + failed_when: '"condition met" not in result.stdout' + + - name: Add content to a spec file in home dir + copy: + dest: $HOME/spec.yaml + content: | + spec: + customContainerImages: + aodhAPIImage: quay.io/podified-master-centos9/openstack-aodh-api:current-podified + aodhEvaluatorImage: quay.io/podified-master-centos9/openstack-aodh-evaluator:current-podified + aodhListenerImage: quay.io/podified-master-centos9/openstack-aodh-listener:current-podified + aodhNotifierImage: quay.io/podified-master-centos9/openstack-aodh-notifier:current-podified + heatAPIImage: quay.io/podified-master-centos9/openstack-heat-api:current-podified + heatCfnapiImage: quay.io/podified-master-centos9/openstack-heat-api-cfn:current-podified + heatEngineImage: quay.io/podified-master-centos9/openstack-heat-engine:current-podified + + - name: Update the openstackversions CR to run master images for aodh and heat + ansible.builtin.shell: + cmd: | + oc patch openstackversions openstack-galera-network-isolation --type merge --patch-file $HOME/spec.yaml + register: result + +...