diff --git a/Makefile b/Makefile index 00197fe3..ed738906 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ FILE_LIST = LICENSE \ README.md \ + crm_cfg \ icons \ man \ ra \ diff --git a/Makefile-tester b/Makefile-tester index bcf9878b..e105d3cb 100644 --- a/Makefile-tester +++ b/Makefile-tester @@ -5,8 +5,11 @@ FILE_LIST = LICENSE \ README.md \ - test \ + crm_cfg \ man-tester \ + man-tester-client \ + test \ + tools \ PKG = SAPHanaSR-tester SPECFILE = ${PKG}.spec diff --git a/SAPHanaSR-angi.spec b/SAPHanaSR-angi.spec index ae238c17..4f1954d5 100644 --- a/SAPHanaSR-angi.spec +++ b/SAPHanaSR-angi.spec @@ -1,9 +1,9 @@ # -# spec file for package SAPHanaSR +# spec file for package SAPHanaSR-angi # # Copyright (c) 2013-2014 SUSE Linux Products GmbH, Nuernberg, Germany. # Copyright (c) 2014-2016 SUSE Linux GmbH, Nuernberg, Germany. -# Copyright (c) 2017-2023 SUSE LLC. +# Copyright (c) 2017-2024 SUSE LLC. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -21,7 +21,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Resource agents to control the HANA database in system replication setup -Version: 1.2.5 +Version: 1.2.6 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ @@ -78,6 +78,7 @@ mkdir -p %{buildroot}/usr/bin mkdir -p %{buildroot}%{_docdir}/%{name} mkdir -p %{buildroot}/usr/share/%{name}/icons mkdir -p %{buildroot}/usr/share/%{name}/samples +mkdir -p %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp mkdir -p %{buildroot}/usr/lib/ocf/resource.d/suse mkdir -p %{buildroot}/usr/lib/%{name} mkdir -p %{buildroot}%{_mandir}/man7 @@ -95,6 +96,9 @@ install -m 0644 srHook/susCostOpt.py %{buildroot}/usr/share/%{name}/ install -m 0644 srHook/susChkSrv.py %{buildroot}/usr/share/%{name}/ install -m 0444 srHook/global.ini_* %{buildroot}/usr/share/%{name}/samples +# crm config templates +install -m 0644 crm_cfg/angi-ScaleUp/[0-9]*_* %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp + # icons for SAPHanaSR-monitor install -m 0444 icons/* %{buildroot}/usr/share/%{name}/icons @@ -111,6 +115,7 @@ install -m 0555 tools/SAPHanaSR-replay-archive-legacy %{buildroot}/usr/bin install -m 0555 tools/SAPHanaSR-filter-legacy %{buildroot}/usr/bin install -m 0555 tools/SAPHanaSR-hookHelper %{buildroot}/usr/bin install -m 0555 tools/SAPHanaSR-manageProvider %{buildroot}/usr/bin +install -m 0555 tools/SAPHanaSR-upgrade-to-angi-demo %{buildroot}/usr/share/%{name}/samples install -m 0444 tools/SAPHanaSRTools.pm %{buildroot}/usr/lib/%{name} install -m 0444 tools/saphana_sr_tools.py %{buildroot}/usr/lib/%{name} diff --git a/SAPHanaSR-tester.spec b/SAPHanaSR-tester.spec index 03e49b61..12b2e430 100644 --- a/SAPHanaSR-tester.spec +++ b/SAPHanaSR-tester.spec @@ -1,7 +1,8 @@ # -# spec file for package SAPHanaSR +# spec file for package SAPHanaSR-tester # -# Copyright (c) 2023 SUSE LLC. +# Author: Fabian Herschel +# Copyright (c) 2023-2024 SUSE LLC. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,7 +20,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Test suite for SAPHanaSR clusters -Version: 1.2.8 +Version: 1.2.13 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ @@ -31,6 +32,11 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build Requires: python3 +%package client +Group: Productivity/Clustering/HA +Summary: Test suite for SAPHanaSR clusters - SAPHanaSR-tester-client is to be installed on all SAPHanaSR classic nodes +Conflicts: SAPHanaSR-angi + %description SAPHanaSR-tester is a suite for semi-automated tests of SAPHanaSR clusters. First focussed test-scenarios are angi-ScaleUp and angi-ScaleOut (e.g. for ERP systems). @@ -44,17 +50,22 @@ Authors: -------- Fabian Herschel +%description client +SAPHanaSR-tester-client is to be installed on all SAPHanaSR classic nodes to allow SAPHanaSR-tester to check the cluster attributes with the same method. + %prep tar xf %{S:0} #%define crmscr_path /usr/share/crmsh/scripts/ %build gzip man-tester/* +gzip man-tester-client/* %install mkdir -p %{buildroot}/usr/bin #mkdir -p %{buildroot}%{_docdir}/%{name} mkdir -p %{buildroot}/usr/share/%{name} +mkdir -p %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp mkdir -p %{buildroot}/usr/lib/%{name} mkdir -p %{buildroot}%{_mandir}/man5 mkdir -p %{buildroot}%{_mandir}/man7 @@ -71,27 +82,44 @@ install -m 0755 test/callTest* %{buildroot}/usr/bin install -m 0755 test/loopTests* %{buildroot}/usr/bin install -m 0755 test/sct_* %{buildroot}/usr/bin +# client files +install -m 0755 tools/SAPHanaSR-showAttr %{buildroot}/usr/bin +mkdir -p %{buildroot}/usr/lib/SAPHanaSR-angi +install -m 0755 tools/saphana_sr_tools.py %{buildroot}/usr/lib/SAPHanaSR-angi + # test definitions pwd ls test/json -cp -va test/json %{buildroot}/usr/share/%{name} -cp -va test/www %{buildroot}/usr/share/%{name} +cp -a test/json %{buildroot}/usr/share/%{name} +cp -a test/www %{buildroot}/usr/share/%{name} +install -m 0644 crm_cfg/angi-ScaleUp/[0-9]*_* %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp # manual pages install -m 0444 man-tester/*.5.gz %{buildroot}%{_mandir}/man5 install -m 0444 man-tester/*.7.gz %{buildroot}%{_mandir}/man7 install -m 0444 man-tester/*.8.gz %{buildroot}%{_mandir}/man8 +# man pages for client package +install -m 0444 man-tester-client/*.7.gz %{buildroot}%{_mandir}/man7 + %files %defattr(-,root,root) /usr/share/%{name} %dir /usr/lib/%{name} /usr/lib/%{name}/saphana_sr_*.py -/usr/bin/* - +/usr/bin/SAPHanaSR-testCluster +/usr/bin/SAPHanaSR-checkJson +/usr/bin/sct_* +/usr/bin/callTest* +/usr/bin/loopTests* +/usr/bin/cs_ssh +/usr/bin/SAPHanaSR-testCluster-html %license LICENSE -#%dir %{_docdir}/%{name} %doc README.md %doc %{_mandir}/man*/* +%files client +/usr/bin/SAPHanaSR-showAttr +/usr/lib/SAPHanaSR-angi + %changelog diff --git a/crm_cfg/angi-ScaleUp/010_basics_crm.txt b/crm_cfg/angi-ScaleUp/010_basics_crm.txt new file mode 100644 index 00000000..54848fd7 --- /dev/null +++ b/crm_cfg/angi-ScaleUp/010_basics_crm.txt @@ -0,0 +1,15 @@ +# +# base config (1) +# +property cib-bootstrap-options: \ + cluster-name=SAP@@SID@@_@@INO@@ \ + stonith-enabled=true \ + stonith-action=reboot \ + stonith-timeout=90 \ + priority-fencing-delay=30 +rsc_defaults rsc-options: \ + migration-threshold=5000 \ + resource-stickiness=1000 +op_defaults op-options: \ + timeout=600 \ + record-pending=true diff --git a/crm_cfg/angi-ScaleUp/020_resource_sbd_crm.txt b/crm_cfg/angi-ScaleUp/020_resource_sbd_crm.txt new file mode 100644 index 00000000..2f508730 --- /dev/null +++ b/crm_cfg/angi-ScaleUp/020_resource_sbd_crm.txt @@ -0,0 +1,5 @@ +# +# primitive sbd (2) +# +primitive rsc_stonith_sbd stonith:external/sbd \ + params pcmk_delay_base=15 diff --git a/crm_cfg/angi-ScaleUp/030_clone_top_crm.txt b/crm_cfg/angi-ScaleUp/030_clone_top_crm.txt new file mode 100644 index 00000000..f8d21785 --- /dev/null +++ b/crm_cfg/angi-ScaleUp/030_clone_top_crm.txt @@ -0,0 +1,14 @@ +# +# clone TOP (3) +# +primitive rsc_SAPHanaTop_@@SID@@_HDB@@INO@@ ocf:suse:SAPHanaTopology \ + op monitor interval=50 timeout=600 \ + op start interval=0 timeout=3600 \ + op stop interval=0 timeout=300 \ + params SID=@@SID@@ InstanceNumber=@@INO@@ +# +clone cln_SAPHanaTop_@@SID@@_HDB@@INO@@ rsc_SAPHanaTop_@@SID@@_HDB@@INO@@ \ + meta clone-node-max=1 interleave=true +# +#################################################################### +# diff --git a/crm_cfg/angi-ScaleUp/040_clone_fil_crm.txt b/crm_cfg/angi-ScaleUp/040_clone_fil_crm.txt new file mode 100644 index 00000000..f7ea0bca --- /dev/null +++ b/crm_cfg/angi-ScaleUp/040_clone_fil_crm.txt @@ -0,0 +1,14 @@ +# +# clone FIL (4) +# +primitive rsc_SAPHanaFil_@@SID@@_HDB@@INO@@ ocf:suse:SAPHanaFilesystem \ + op monitor interval=60 timeout=60 \ + op start interval=0 timeout=10 \ + op stop interval=0 timeout=20 on-fail=fence \ + params SID=@@SID@@ InstanceNumber=@@INO@@ +# +clone cln_SAPHanaFil_@@SID@@_HDB@@INO@@ rsc_SAPHanaFil_@@SID@@_HDB@@INO@@ \ + meta clone-node-max=1 interleave=true +# +#################################################################### +# diff --git a/crm_cfg/angi-ScaleUp/050_clone_con_crm.txt b/crm_cfg/angi-ScaleUp/050_clone_con_crm.txt new file mode 100644 index 00000000..c3915c35 --- /dev/null +++ b/crm_cfg/angi-ScaleUp/050_clone_con_crm.txt @@ -0,0 +1,17 @@ +# +# promotable clone (5) +# +primitive rsc_SAPHanaCon_@@SID@@_HDB@@INO@@ ocf:suse:SAPHanaController \ + op start interval=0 timeout=3600 \ + op stop interval=0 timeout=3600 \ + op promote interval=0 timeout=900 \ + op monitor interval=60 role=Promoted timeout=700 \ + op monitor interval=61 role=Unpromoted timeout=700 \ + params SID=@@SID@@ InstanceNumber=@@INO@@ PREFER_SITE_TAKEOVER=yes DUPLICATE_PRIMARY_TIMEOUT=600 AUTOMATED_REGISTER=yes \ + meta priority=100 +# +clone mst_SAPHanaCon_@@SID@@_HDB@@INO@@ rsc_SAPHanaCon_@@SID@@_HDB@@INO@@ \ + meta clone-max=2 clone-node-max=1 interleave=true promotable=true maintenance=true +# +#################################################################### +# diff --git a/crm_cfg/angi-ScaleUp/060_resource_ip_crm.txt b/crm_cfg/angi-ScaleUp/060_resource_ip_crm.txt new file mode 100644 index 00000000..0ff18cb4 --- /dev/null +++ b/crm_cfg/angi-ScaleUp/060_resource_ip_crm.txt @@ -0,0 +1,7 @@ +# +# primitive IP (6) +# +primitive rsc_ip_@@SID@@ IPaddr2 \ + params ip=@@IP@@ \ + op monitor interval=60 timeout=20 \ + op_params on_fail=restart diff --git a/crm_cfg/angi-ScaleUp/070_constraints_crm.txt b/crm_cfg/angi-ScaleUp/070_constraints_crm.txt new file mode 100644 index 00000000..085fa311 --- /dev/null +++ b/crm_cfg/angi-ScaleUp/070_constraints_crm.txt @@ -0,0 +1,6 @@ +# +# constraints (7) +# +colocation col_ip_prim_SAPHana_@@SID@@_HDB@@INO@@ 2000: rsc_ip_@@SID@@:Started mst_SAPHanaCon_@@SID@@_HDB@@INO@@:Promoted +# +order ord_SAPHana_@@SID@@_HDB@@INO@@ Optional: cln_SAPHanaTop_@@SID@@_HDB@@INO@@ mst_SAPHanaCon_@@SID@@_HDB@@INO@@ diff --git a/crm_cfg/angi-ScaleUp/alternative-names.txt b/crm_cfg/angi-ScaleUp/alternative-names.txt new file mode 100644 index 00000000..53c9f9fc --- /dev/null +++ b/crm_cfg/angi-ScaleUp/alternative-names.txt @@ -0,0 +1,42 @@ +# +# sle15 hana ScaleUp perfopt: +# +crm-bs.txt +crm-sbd.txt +crm-saphanatop.txt +crm-saphana.txt +crm-vip.txt +crm-cs.txt + +# +# sle15 hana ScaleUp costopt: +# +crm-bs.txt +crm-saphanatop.txt +crm-saphana.txt +crm-vip.txt +crm-cs.txt +crm-si.txt +crm-con.txt + +# +# sle15 hana ScaleOut +# +crm-bs.txt +crm-fencing.txt +crm-saphanatop.txt +crm-saphanacon.txt +crm-vip.txt +crm-cs.txt + +# +# sle15 hana ScaleOut Multi-Target +# +crm-bs.txt +crm-fencing.txt +crm-saphanatop.txt +crm-saphanacon.txt +crm-vip.txt +crm-cs.txt + + diff --git a/man-tester-client/SAPHanaSR-tester-client.7 b/man-tester-client/SAPHanaSR-tester-client.7 new file mode 100644 index 00000000..ec5ba38d --- /dev/null +++ b/man-tester-client/SAPHanaSR-tester-client.7 @@ -0,0 +1,68 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR-tester-client 7 "27 Jan 2024" "" "SAPHanaSR" +.\" +.SH NAME +SAPHanaSR-tester-client \- Client for functional testing SAPHanaSR clusters. +.PP +.\" +.DESCRIPTION +.\" +SAPHanaSR-tester-client is a package that allows to use SAPHanaSR-tester for +testing classical SAPHanaSR and SAPHanaSR-ScaleOut HA clusters. + +.B SAPHanaSR-tester is shipped as technology preview. + +The package provides a specific version of SAPHanaSR-shwoAttr, compatible to +SAPHanaSR-angi and SAPHanaSR-tester. +.PP +.\" +.SH EXAMPLES +.\" +\fB*\fR Checking the SAPHanaSR-showAttr. +.PP +The specific SAPHanaSR-showAttr is called to show the output format needed for +testing. See manual page SAPHanaSR-showAttr(8) from package SAPHanaSR-angi for +details. +.PP +.RS 2 +# /usr/bin/SAPHanaSR-showAttr --format=tester +.RE +.PP +.\" +.SH FILES +.\" +.TP +/usr/bin/SAPHanaSR-showAttr +specific SAPHanaSR-showAttr, compatible to SAPHanaSR-angi and SAPHanaSR-tester +.PP +.\" +.SH REQUIREMENTS +See SAPHanaSR-tester(7), SAPHanaSR-tests-classic-ScaleUp(7) and SAPHanaSR(7). +.\" +.SH BUGS +The SAPHanaSR-tester-client is under development. Syntax and formats are +subject to change. +.br +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +\fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-tests-classic-ScaleUp\fP(7) , +\fBSAPHanaSR-showAttr\fP(8), \fBSAPHanaSR\fP(7) +.PP +.\" +.SH AUTHORS +F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +(c) 2024 SUSE LLC +.br +The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" diff --git a/man-tester/SAPHanaSR-testCluster.8 b/man-tester/SAPHanaSR-testCluster.8 index 8762f45c..385ac46d 100644 --- a/man-tester/SAPHanaSR-testCluster.8 +++ b/man-tester/SAPHanaSR-testCluster.8 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-testCluster 8 "20 Nov 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR-testCluster 8 "03 Apr 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-testCluster \- Run functional tests for SAPHanaSR clusters. @@ -23,6 +23,7 @@ and test automation frameworks. .PP .\" .SH OPTIONS +.\" .TP --help print help message and exit. @@ -37,7 +38,7 @@ print failed checks per loop. print and log debug messages. .TP --logFile \fIFILE\fP -path to defaults file. +path to log file. .TP --printTestProperties print test properties and exit. @@ -132,7 +133,7 @@ functional tests for SAPHanaSR-angi scale-out ERP scenarios, without HANA host a functional tests for SAPHanaSR-angi scale-out BW scenarios, with HANA host auto-autofailover. Not yet implemented. .TP /usr/share/SAPHanaSR-tester/json/classic-ScaleUp/ -not yet implemented. +functional tests for SAPHanaSR classic scale-up scenarios. .TP /usr/share/SAPHanaSR-tester/json/classic-ScaleOut/ not yet implemented. @@ -173,7 +174,7 @@ F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT -(c) 2023 SUSE LLC +(c) 2023-2024 SUSE LLC .br The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man-tester/SAPHanaSR-tester.7 b/man-tester/SAPHanaSR-tester.7 index 9cb443d3..8ad40cce 100644 --- a/man-tester/SAPHanaSR-tester.7 +++ b/man-tester/SAPHanaSR-tester.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-tester 7 "20 Nov 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR-tester 7 "29 Feb 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-tester \- Functional testing for SAPHanaSR clusters. @@ -80,8 +80,8 @@ See manual page SAPHanaSR-testCluster(8) and SAPHanaSR-tests-description(7). The SAPHanaSR-tester is installed on an node outside the Linux cluster. Password-less login for user root is prepared. The tester properties are adapted to the SAPHanaSR scale-up cluster. Example scale-up cluster nodes are -node1 and node2, SID is Q42, instance number is 85, database user key is -TESTER. +node1 and node2, SID is Q42, instance number is 85, HANA´s virtual IP address is +192.168.178.85, database user key is TESTER. .PP .RS 2 # zypper in SAPHanaSR-tester @@ -108,7 +108,9 @@ TESTER. .br "mstResource": "mst_SAPHanaCon_Q42_HDB85", .br - "clnResource": "cln_SAPHanaTop_Q42_HDB85" + "clnResource": "cln_SAPHanaTop_Q42_HDB85", +.br + "ipAddr": "192.168.178.85", .br "userKey": "TESTER" .br @@ -183,6 +185,9 @@ auto-generated properties file for called shell scripts. No need to touch. .TP /usr/bin/SAPHanaSR-testCluster program to run a predefined testcase. +.TP +/usr/bin/SAPHanaSR-showAttr +SAPHanaSR-showAttr, compatible to SAPHanaSR-angi and SAPHanaSR-tester .PP .\" .SH REQUIREMENTS @@ -190,7 +195,10 @@ program to run a predefined testcase. For the current version of the SAPHanaSR-tester, the capabilities are limited to the following: .PP -\fB*\fP Resource agents and configuration of SAPHanaSR-angi. +\fB*\fP Resource agents and configuration of SAPHanaSR-angi is supported. +.PP +\fB*\fP Resource agents and configuration of SAPHanaSR or SAPHanaSR-ScaleOut +with additional package SAPHanaSR-tester-client is supported. .PP \fB*\fP Scale-up setups with or without multi-target replication. No scale-up multi-SID (MCOS) setups. @@ -207,7 +215,10 @@ cluster. login as database user SYSTEM into the HANA database. This is only needed for some test cases. .PP -\fB*\fP Package python3-paramiko is needed for the tester´s multi-node feature. +\fB*\fP Package python3-paramiko is needed on the tester machine for the +tester´s multi-node feature. +.PP +\fB*\fP Package ClusterTools2 is needed on the cluster nodes. .PP \fB*\fP Strict time synchronization between the cluster nodes and the tester node, e.g. NTP. All nodes have configured the same timezone. @@ -254,7 +265,7 @@ F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT -(c) 2023 SUSE LLC +(c) 2023-2024 SUSE LLC .br The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 b/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 index 1cf36f20..0fc4d4b7 100644 --- a/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 +++ b/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 @@ -23,10 +23,10 @@ Predefined functional tests for scale-out ERP overwiev: Blocked manual takeover, for susTkOver.py. .TP \fBblock_sr_and_freeze_prim_master_nfs\fP -Block HANA SR and freeze HANA NFS on primary master node (not yet implemented). +Block HANA SR and freeze HANA NFS on primary master node. Not yet implemented. .TP \fBblock_sr_and_freeze_prim_site_nfs\fP -Block HANA SR and freeze HANA NFS on primary site (not yet implemented). +Block HANA SR and freeze HANA NFS on primary site. Not yet implemented. .TP \fBfree_log_area\fP Free HANA log area on primary site. @@ -38,7 +38,7 @@ Freeze HANA NFS on primary master node. Freeze HANA NFS on primary site. .TP \fBfreeze_secn_site_nfs\fP -Freeze HANA NFS on secondary site (not yet implemented). +Freeze HANA NFS on secondary site. Not yet implemented. .TP \fBkill_prim_indexserver\fP Kill primary master indexserver, for susChkSrv.py. @@ -50,7 +50,7 @@ Kill primary master instance. Kill primary master node. .TP \fBkill_prim_site\fP -Kill secondary site nodes (not yet implemented). +Kill secondary site nodes. Not yet implemented. .TP \fBkill_prim_worker_indexserver\fP Kill primary worker indexserver, for susChkSrv.py. @@ -77,26 +77,34 @@ Kill secondary worker instance. Kill secondary worker node. .TP \fBkill_secn_site\fP -Kill secondary site nodes (not yet implemented). +Kill secondary site nodes. Not yet implemented. +.TP +\fBmaintenance_cluster_bootstrap\fP +Maintenance procedure, initially configuring cluster resources. Not yet implemented. +.TP +\fBmaintenance_cluster_hana_running\fP +Maintenance procedure with stopping and restarting cluster, keep HANA running. .TP \fBmaintenance_cluster_turn_hana\fP Maintenance procedure, manually turning HANA sites. .TP \fBmaintenance_with_standby_nodes\fP -Maintenance procedure, standby+online secondary then standby+online primary (not yet implemented). +Maintenance procedure, standby+online secondary then standby+online primary. Not yet implemented. .TP \fBnop\fP No operation - check, wait and check again (stability check). .TP +\fBone_stable_hour\fP +Check regulary for one hour that there is no failure, like nop. +.TP \fBregister_prim_cold_hana\fP -Stop cluster, do manual takeover, leave former primary down and unregistered, start cluster -(not yet implemented). +Stop cluster, do manual takeover, leave former primary down and unregistered, start cluster. Not yet implemented. .TP \fBrestart_cluster\fP Stop and restart cluster and HANA .TP \fBrestart_cluster_hana_running\fP -Stop and restart cluster, keep HANA running. +Stop and restart cluster, keep HANA running. Slightly differs from maintenance. .TP \fBrestart_cluster_turn_hana\fP Stop cluster and HANA, takeover HANA, start cluster. diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleUp.7 b/man-tester/SAPHanaSR-tests-angi-ScaleUp.7 index e18ccffd..067bc91b 100644 --- a/man-tester/SAPHanaSR-tests-angi-ScaleUp.7 +++ b/man-tester/SAPHanaSR-tests-angi-ScaleUp.7 @@ -20,7 +20,7 @@ Predefined functional tests for scale-up overview: Blocked manual takeover, for susTkOver.py. .TP \fBblock_sr\fP -Block HANA SR and check SFAIL attribute; unblock to recover. +Block HANA SR and check SFAIL attribute; unblock to recover, for SAPHanaSR.py. .TP \fBblock_sr_and_freeze_prim_fs\fP Block HANA SR and freeze HANA FS on primary master node. @@ -52,6 +52,12 @@ Kill secondary instance. \fBkill_secn_node\fP Kill secondary node. .TP +\fBmaintenance_cluster_bootstrap\fP +Maintenance procedure, initially configuring cluster resources. +.TP +\fBmaintenance_cluster_hana_running\fP +Maintenance procedure with stopping and restarting cluster, keep HANA running. +.TP \fBmaintenance_cluster_turn_hana\fP Maintenance procedure, manually turning HANA sites. .TP @@ -61,11 +67,14 @@ Maintenance procedure, standby+online secondary then standby+online primary. \fBnop\fP No operation - check, wait and check again (stability check). .TP +\fBone_stable_hour\fP +Check regulary for one hour that there is no failure, like nop. +.TP \fBregister_prim_cold_hana\fP -Stop cluster, do manual takeover, leave former primary down and unregistered, start cluster (not yet implementeed). +Stop cluster, do manual takeover, leave former primary down and unregistered, start cluster. Not yet implemented. .TP \fBrestart_cluster_hana_running\fP -Stop and restart cluster, keep HANA running. +Stop and restart cluster, keep HANA running. Slightly differs from maintenance. .TP \fBrestart_cluster\fP Stop and restart cluster and HANA. diff --git a/man-tester/SAPHanaSR-tests-classic-ScaleUp.7 b/man-tester/SAPHanaSR-tests-classic-ScaleUp.7 new file mode 100644 index 00000000..5787fb75 --- /dev/null +++ b/man-tester/SAPHanaSR-tests-classic-ScaleUp.7 @@ -0,0 +1,140 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR-tests-classic-ScaleUp 7 "29 Feb 2024" "" "SAPHanaSR-angi" +.\" +.SH NAME +SAPHanaSR-tests-classic-ScaleUp \- Functional tests for SAPHanaSR Scale-Up. +.PP +.\" +.SH DESCRIPTION +.PP +Functional test are shipped for scale-up scenarios. This tests could be run +out-of-the-box. The test cases are defined in dedicated files. +See manual page SAPHanaSR-tests-syntax(5) for syntax details. Details like +performed steps or expected behaviour of cluster and HANA are explained in +SAPHanaSR-tests-description(7). +.PP +Predefined functional tests for SAPHanaSR scale-up overview: +.TP +\fBblock_manual_takeover\fP +Blocked manual takeover, for susTkOver.py. +.TP +\fBblock_sr\fP +Block HANA SR and check SFAIL attribute; unblock to recover, for SAPHanaSR.py. +.TP +\fBflup\fP +Like nop but very short sleep, just checking the test engine. +.TP +\fBfree_log_area\fP +Free HANA log area on primary site. +.TP +\fBkill_prim_indexserver\fP +Kill primary indexserver, for susChkSrv.py. +.TP +\fBkill_prim_inst\fP +Kill primary instance. +.TP +\fBkill_prim_node\fP +Kill primary node. +.TP +\fBkill_secn_indexserver\fP +Kill secondary indexserver, for susChkSrv.py. +.TP +\fBkill_secn_inst\fP +Kill secondary instance. +.TP +\fBkill_secn_node\fP +Kill secondary node. +.TP +\fBmaintenance_cluster_bootstrap\fP +Maintenance procedure, initially configuring cluster resources. Not yet implemented. +.TP +\fBmaintenance_cluster_hana_running\fP +Maintenance procedure with stopping and restarting cluster, keep HANA running. +.\" former restart_cluster_hana_running +.TP +\fBmaintenance_cluster_turn_hana\fP +Maintenance procedure, manually turning HANA sites. +.TP +\fBmaintenance_cluster_with_standby_nodes\fP +Maintenance procedure, standby+online secondary then standby+online primary. +.TP +\fBnop\fP +No operation - check, wait and check again (stability check). +.TP +\fBone_stable_hour\fP +Check regulary for one hour that there is no failure, like nop. +.TP +\fBsplit_brain_prio\fP +Network split-brain with priority fencing. +.TP +\fBstandby_prim_node\fP +Set primary node standby and online again. +.TP +\fBstandby_secn_node\fP +Set secondary node standby and online again. +.PP +.\" +.SH EXAMPLES +.PP +* List tests for SAPHanaSR scale-up scenarios +.PP +.RS 2 +# ls /usr/share/SAPHanaSR-tester/json/classic-ScaleUp/ +.RE +.PP +* Check for new SAPHanaSR-showAttr +.PP +.RS 2 +/usr/bin/SAPHanaSR-showAttr --format=tester +.RE +.PP +.\" +.SH FILES +.\" +.TP +/usr/share/SAPHanaSR-tester/json/classic-ScaleUp/ +functional tests for SAPHanaSR scale-up scenarios. +.TP +/usr/bin/sct_test_* +shell scripts for un-easy tasks on the cluster nodes. +.TP +/usr/bin/SAPHanaSR-showAttr +script SAPHanaSR-showAttr from SAPHanaSR-angi. +.PP +.\" +.SH REQUIREMENTS +.\" +* The package SAPHanaSR-tester-client is needed on the cluster nodes. +.PP +* See also the REQUIREMENTS section in SAPHanaSR-tester(7) and SAPHanaSR(7). +.PP +* Of course, HANA database and Linux cluster have certain requirements. +Please refer to the product documentation. +.PP +.\" +.SH BUGS +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +\fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-testCluster\fP(8) , +\fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-syntax\fP(5) , +\fBSAPHanaSR-tests-ScaleOut\fP(7) , +\fBSAPHanaSR\fP(7) , \fBSAPHanaSR-showAttr\fP(8) +.PP +.\" +.SH AUTHORS +F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +(c) 2024 SUSE LLC +.br +The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" diff --git a/man-tester/SAPHanaSR-tests-description.7 b/man-tester/SAPHanaSR-tests-description.7 index 26e9848a..23ea68ed 100644 --- a/man-tester/SAPHanaSR-tests-description.7 +++ b/man-tester/SAPHanaSR-tests-description.7 @@ -65,7 +65,7 @@ Comment: Infrastructure failure, main cluster case. .RS 2 Descr: Block HANA SR and freeze HANA FS on primary node. .br -Topology: ScaleUp. +Topology: ScaleUp (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -84,7 +84,7 @@ Comment: Infrastructure failure, main cluster case. Descr: Block HANA SR and freeze HANA NFS on primary master node (not yet implemented). .br -Topology: ScaleOut. +Topology: ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -103,7 +103,7 @@ Comment: Infrastructure failure, main cluster case. Descr: Block HANA SR and freeze HANA NFS on primary site (not yet implemented). .br -Topology: ScaleOut. +Topology: ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -151,7 +151,7 @@ Comment: Just housekeeping. .RS 2 Descr: Freeze HANA FS on primary node. .br -Topology: ScaleUp. +Topology: ScaleUp (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -170,7 +170,7 @@ Comment: Infrastructure failure, main cluster case. .RS 2 Descr: Freeze HANA NFS on primary master node. .br -Topology: ScaleOut. +Topology: ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -189,7 +189,7 @@ Comment: Infrastructure failure, main cluster case. .RS 2 Descr: Freeze HANA NFS on primary site. .br -Topology: ScaleOut. +Topology: ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -208,7 +208,7 @@ Comment: Infrastructure failure, main cluster case. .RS 2 Freeze HANA NFS on secondary site. .br -Topology: ScaleOut. +Topology: ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br @@ -431,6 +431,56 @@ No takeover. One fencing. Comment: Node failure, main cluster case. .RE .PP +\fBmaintenance_cluster_bootstrap\fP +.RS 2 +Descr: Initially configuring cluster resources in CIB. +.br +Topology: ScaleUp, ScaleOut (not yet implemented). +.br +Prereq: Cluster and HANA are up and running, all good. +.br +Test: See SAPHanaSR_basic_cluster(7), SAPHanaSR-ScaleOut_basic_cluster(7), +ocf_suse_SAPHanaController(7), ocf_suse_SAPHanaTopology(7), +ocf_suse_SAPHanaFilesystem(7), +https://documentation.suse.com/sbp/sap-15/ . +.br +TODO +.br +Expect: CIB contains the documented resource configuration. +All nodes stay online. +Cluster stopped and restarted. +Both HANA keep running. +SR stays SOK. +No takeover. No fencing. +.br +Comment: Very first admin procedure. \fBOriginal CIB will be lost.\fP +.RE +.PP +\fBmaintenance_cluster_hana_running\fP +.RS 2 +Descr: Stop and restart cluster, keep HANA running. +.br +Topology: ScaleUp, ScaleOut. +.br +Prereq: Cluster and HANA are up and running, all good. +.br +Test: See SAPHanaSR_maintenance_examples(7). +crm maintenance on; +crm cluster stop --all; +crm cluster start --all; +crm resource refresh ; +crm resource refresh ; +crm resource maintenance off; +.br +Expect: All nodes stay online. +Cluster stopped and restarted. +Both HANA keep running. +SR stays SOK. +No takeover. No fencing. +.br +Comment: Main admin procedure. +.RE +.PP \fBmaintenance_cluster_turn_hana\fP .RS 2 Descr: Maintenance procedure, manually turning HANA sites. @@ -484,6 +534,21 @@ Expect: Cluster and HANA are up and running, all good. Comment: Main cluster case. .RE .PP +\fBone_stable_hour\fP +.RS 2 +Descr: Check regulary for one hour that there is no failure. +.br +Topology: ScaleUp, ScaleOut. +.br +Prereq: Cluster and HANA are up and running, all good. +.br +Test: Wait and see, repeat every ten minutes for one hour. +.br +Expect: Cluster and HANA are up and running, all good. +.br +Comment: Main cluster case. +.RE +.PP \fBregister_prim_cold_hana\fP .RS 2 Descr: Stop cluster, do manual takeover, leave former primary down and unregistered, start cluster. @@ -507,13 +572,13 @@ Comment: Admin mistake. .RS 2 Descr: Stop and restart cluster, keep HANA running. .br -Topology: ScaleUp, ScaleOut. +Topology: ScaleUp, ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br Test: crm maintenance on; -crm cluster run "crm cluster stop"; -crm cluster run "crm cluster start"; +crm cluster stop --all; +crm cluster start --all; crm resource refresh ; crm resource refresh ; crm resource maintenance off; @@ -524,21 +589,21 @@ Both HANA keep running. SR stays SOK. No takeover. No fencing. .br -Comment: Main admin procedure. +Comment: Sub-optimal admin procedure. .RE .PP \fBrestart_cluster\fP .RS 2 Descr: Stop and restart cluster and HANA. .br -Topology: ScaleUp, ScaleOut. +Topology: ScaleUp, ScaleOut (angi only). .br Prereq: Cluster and HANA are up and running, all good. .br -Test: crm cluster run "crm cluster stop"; +Test: crm cluster stop --all; sapcontrol ... StartSystem; sapcontrol ... StartSystem; -crm cluster run "crm cluster start"; +crm cluster start --all; .br Expect: All nodes stay online. Cluster stopped and restarted. @@ -687,8 +752,9 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-testCluster\fP(8) , \fBSAPHanaSR-tests-syntax\fP(5) , \fBSAPHanaSR-tests-angi-ScaleUp\fP(7) , -\fBSAPHanaSR-tests-angi-ScaleOut\fP(7) , \fBSAPHanaSR-angi\fP(7) , -\fBSAPHanaSR-showAttr\fP(8) +\fBSAPHanaSR-tests-angi-ScaleOut\fP(7) , +\fBSAPHanaSR-tests-classic-ScaleUp\fP(7) , +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) .PP .\" .SH AUTHORS diff --git a/man-tester/SAPHanaSR-tests-syntax.5 b/man-tester/SAPHanaSR-tests-syntax.5 index 79272ef2..0e84e27a 100644 --- a/man-tester/SAPHanaSR-tests-syntax.5 +++ b/man-tester/SAPHanaSR-tests-syntax.5 @@ -298,6 +298,8 @@ TODO "mstResource": "\fISTRING\fP", .br "clnResource": "\fISTRING\fP", +.br + "ipAddr": "\fIIPADDRESS\fP", .br "userKey": "\fISTRING\fP" .br diff --git a/man/SAPHanaSR-ScaleOut_basic_cluster.7 b/man/SAPHanaSR-ScaleOut_basic_cluster.7 index 87ad8e85..352f809a 100644 --- a/man/SAPHanaSR-ScaleOut_basic_cluster.7 +++ b/man/SAPHanaSR-ScaleOut_basic_cluster.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-ScaleOut_basic_cluster 7 "07 Aug 2023" "" "SAPHanaSR" +.TH SAPHanaSR-ScaleOut_basic_cluster 7 "18 Mar 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-ScaleOut_basic_cluster \- SAP HANA System Replication scale-out basic cluster configuration. @@ -10,7 +10,8 @@ SAPHanaSR-ScaleOut_basic_cluster \- SAP HANA System Replication scale-out basic .\" The SAP HANA System Replication scale-out scenario needs a certain basic cluster configuration. Besides this necessary settings, some additional -configurations might match specific needs. +configurations might match specific needs. Adapting a few SAP HANA settings +might be beneficial as well. .\" .\" \fB* Corosync Basics\fR .\" @@ -110,6 +111,39 @@ In case systemd-style init is used for the HANA database, it might be desired to have the SAP instance service stopping after pacemaker at system shutdown. Therefor a drop-in file for the pacemaker service might help. See examples below. +.PP +\fB* SAP HANA Basics\fR + +\fB/usr/sap/${SID}/SYS/global/hdb/custom/config/global.ini\fR +.PP +\fB[memorymanager]\fR +.br +\fBfinal_memory_release_shutdown = [ auto | on | off ]\fR +.br +\fBfinal_memory_release_crash = [ auto | on | off ]\fR +.PP +Starting with SAP HANA 2.0 SPS06, the database shutdown can be accelerated by +optimizing memory de-allocation. Please refer to SAP documentation before setting +this parameters. +.\" TODO SAP notes 3405297 ? + +\fB/usr/sap/${SID}/SYS/global/hdb/custom/config/daemon.ini +.PP +\fB[daemon]\fR +.br +\fBterminationtimeout = [ \fImillisec\fB ]\fR +.br +\fBforcedterminationtimeout = [ \fImillisec\fB ]\fR +.PP +The first parameter defines the timeout from sending SIGTERM to finally terminating +child processes when HANA is shutting down by the STOP event. Used also as maximal +delay in system restart if 'restartterminationtimeout' parameter is not set. +The second defines the timeout from sending the SIGTERM to finally terminating +child processes when HANA is shutting down by the QUIT event. See also manual page +susChkSrv.py(7). +Please refer to SAP documentation before setting this parameters. +.\" TODO check above + .PP .\" .SH EXAMPLES @@ -453,7 +487,7 @@ F.Herschel, L.Pinne. .SH COPYRIGHT (c) 2018 SUSE Linux GmbH, Germany. .br -(c) 2019-2023 SUSE LLC +(c) 2019-2024 SUSE LLC .br For details see the GNU General Public License at http://www.gnu.org/licenses/gpl.html diff --git a/man/SAPHanaSR-manageProvider.8 b/man/SAPHanaSR-manageProvider.8 index b5b476e9..bb4f986a 100644 --- a/man/SAPHanaSR-manageProvider.8 +++ b/man/SAPHanaSR-manageProvider.8 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-manageProvider 8 "13 Apr 2023" "" "SAPHanaSR" +.TH SAPHanaSR-manageProvider 8 "13 Jan 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-manageProvider \- adds, updates, removes or shows HADR provider sections for SAP HANA @@ -39,7 +39,7 @@ add the section given in the template file to configuration. remove the section given in the template file from configuration. .TP 4 \fB --reconfigure\fR -not implemented yet, use 'hdbnsutil -reloadHADRProviders' instead. +apply change to running HANA, like 'hdbnsutil -reloadHADRProviders'. .TP 4 \fB --show --provider\fR= show the section for given provider from configuration. @@ -56,6 +56,14 @@ Successful program execution. Usage, syntax or execution errors. .\" .SH EXAMPLES +\fB*\fR Show names of all HA/DR provider sections in global.ini. + +Might be useful to see whether sections are named lower/upper/camel-case. +SID is HA1. +.PP +.RS 2 +# su - ha1adm -c "cdcoc; grep ^\\[ha_dr_provider_\ global.ini" +.RE .PP \fB*\fR Show the global.ini section ha_dr_provider_suschksrv, if available. @@ -133,7 +141,10 @@ the SAP python script to interact with global.ini. .SH BUGS Command hangs, if it needs an input file and you do not specify the file. .br -\-\-reconfiure does not work yet. +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP .\" .SH SEE ALSO \fBSAPHanaSR-ScaleOut\fP(7) , \fBSAPHanaSR\fP(7) , @@ -156,7 +167,7 @@ https://www.susecon.com/archive-2021.html F.Herschel, L.Pinne. .\" .SH COPYRIGHT -(c) 2022-2023 SUSE LLC +(c) 2022-2024 SUSE LLC .br SAPHanaSR-manageProvider comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/SAPHanaSR-upgrade-to-angi-demo.8 b/man/SAPHanaSR-upgrade-to-angi-demo.8 new file mode 100644 index 00000000..64fac6ce --- /dev/null +++ b/man/SAPHanaSR-upgrade-to-angi-demo.8 @@ -0,0 +1,284 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR-upgrade-to-angi-demo 8 "08 Mar 2024" "" "SAPHanaSR" +.\" +.SH NAME +SAPHanaSR-upgrade-to-angi-demo \- How to upgrade from SAPHanaSR to SAPHanaSR-angi. +.PP +.\" +.SH SYNOPSIS +\fBSAPHanaSR-upgrade-to-angi-demo\fR [ --help | --version | --list | --check | --erase | --upgrade ] +.br +\fBSAPHanaSR-upgrade-to-angi-demo\fR --run \fIFUNCTION\fP [ \fIFUNCTION\fP [ ... ]] +.\" +.SH DESCRIPTION +.PP +SAPHanaSR-upgrade-to-angi-demo demonstrates how to upgrade from SAPHanaSR to +SAPHanaSR-angi. The script collects information from an Linux HA cluster with +SAPHanaSR. Based on that data it suggests step by step the commands to upgrade +the cluster. The running configuration is not changed. See also manual page +SAPHanaSR_upgrade_to_angi(7). +.PP +.B SAPHanaSR-upgrade-to-angi-demo is shipped as technology preview. +.PP +.\" +.SH OPTIONS +.TP 4 +\fB --help\fP +show help. +.TP 4 +\fB --version\fP +show version. +.TP 4 +\fB --check\fP +do some basic checks on configuration and status of Linux cluster. +.TP 4 +\fB --erase\fP +show demo for removing SAPHanaSR. +.TP 4 +\fB --list\fP +list SAPHanaSR-upgrade-to-angi-demo functions. +.TP 4 +\fB --run \fP \fIFUNCTION\fP [ \fIFUNCTION\fP [ ... ]] +run given function(s). Not all functions will work, once CIB attributes have been deleted. +.TP 4 +\fB --upgrade\fP +show demo for upgrading from SAPHanaSR to SAPHanaSR-angi. +.\" +.PP +.SH RETURN CODES +.B 0 +Successful program execution. +.br +.B >0 +Usage, syntax or execution errors. +.PP +.\" +.SH EXAMPLES +.PP +\fB*\fP Copying the script to outside the package. +.PP +The script needs to be still available on both cluster nodes after the SAPHanaSR +RPM has been removed. Needs to be done on all cluster nodes. +.PP +.RS 2 +# cp -a /usr/shared/SAPHanaSR/samples/SAPHanaSR-upgrade-to-angi-demo /root/bin/ +.br +# chmod 755 /root/bin/SAPHanaSR-upgrade-to-angi-demo +.br +# SAPHanaSR-upgrade-to-angi-demo --help +.RE +.PP +\fB*\fP Example for finding the HANA primary node. +.PP +.RS 2 +# SAPHanaSR-showAttr --format=script |\\ +.br + awk -F/ '$3=="clone_state=\\"PROMOTED\\"" {print $2}' +.br +# crm_mon -1r | grep "* Masters: \\[" +.RE +.PP +\fB*\fP Checking sane state of cluster. +.PP +This steps should be performed before doing anything with the cluster, and after +something has been done. +See also SAPHanaSR_upgrade_to_angi(7) and SAPHanaSR_maintenance_examples(7). +.PP +.RS 2 +# cs_clusterstate -i +.br +# crm_mon -1r +.br +# crm configure show type:location | grep cli- +.br +# SAPHanaSR-showAttr +.RE +.PP +\fB*\fP Checking pre-requisites for the upgrade. +.PP +The scripts should be run on the HANA primary node before the upgrade. It also +could be run on the HANA secondary. Any error message should be investigated. +The check covers several requirements for the upgrade, but not all. On the other +hand, some of the pre-requisites are caused by limitations of the script +SAPHanaSR-upgrade-to-angi-demo itself. Those might not be needed for doing the +real upgrade manually. See also SAPHanaSR_upgrade_to_angi(7), +cs_show_error_patterns(8) and cs_show_cluster_patterns(8). +.PP +.RS 2 +# SAPHanaSR-upgrade-to-angi-demo --check +.RE +.PP +\fB*\fP Demonstrating an upgrade. +.PP +HANA´s SID and instance number will be detected, as well as the names of both +cluster nodes. The names of cluster resources and constraints will be derived +from the CIB. Based on that information, command sequences will be proposed for +performing an upgrade: +.br +- Collecting needed data. +.br +- Backing up old CIB, sudoers file and global.ini. +.br +- Setting HANA resource into maintenance mode. +.br +- Removing old rules from sudoers file on both nodes. +.br +- Removing old hook scripts from global.ini on both nodes. +.br +- Removing old cluster resources and constraints from CIB. +.br +- Removing old node attributes and SAPHanaSR properties from CIB. +.br +- Removing old SAPHanaSR package. +.br +- Adding new SAPHanaSR-angi package. +.br +- Adding new rules to sudoers file on both nodes. +.br +- Adding new hook scripts to global.ini on both nodes. +.br +- Adding new cluster resources and constraints to CIB. +.br +- Probing HANA resource status. +.br +- Setting HANA resource back to managed. +.PP +The script needs to be copied to all cluster nodes upfront. It should be called +on the HANA primary node. Before doing this, you should check and prepare +pre-requisites, see example above. The proposed commands need to be checked. +Sometimes adaptions are neccessary. +See also SAPHanaSR_upgrade_to_angi(7). +.PP +.RS 2 +# SAPHanaSR-upgrade-to-angi-demo --upgrade +.RE +.PP +\fB*\fP Drafting a runbook for the manual upgrade. +.PP +The script should be used on the HANA primary node. +Before doing this, you should check and prepare pre-requisites, see example +above. The runbook draft is stored as file "SAPHanaSR-upgrade-draft.txt". +This draft can be used for preparing details for the upgrade procedure. +The proposed commands need to be checked. Sometimes adaptions are +neccessary. Of course the result needs to be checked finally as well. +See also SAPHanaSR_upgrade_to_angi(7) and tee(1). +.PP +.RS 2 +# SAPHanaSR-upgrade-to-angi-demo --upgrade | tee SAPHanaSR-upgrade-draft.txt +.br +# less SAPHanaSR-upgrade-draft.txt +.RE +.PP +\fB*\fP Demonstrating removal of package SAPHanaSR and its configuration. +.PP +Removing the old SAPHanaSR without performing the complete upgrade manually +might be useful, if installation and configuration of the new SAPHanaSR-angi +should be done by deploymant automation tools. +.PP +HANA´s SID and instance number will be detected, as well as the names of both +cluster nodes. The names of cluster resources and constraints will be derived +from the CIB. Based on that information, command sequences will be proposed for +performing the removal of SAPHanaSR: +.br +- Collecting needed data. +.br +- Backing up old CIB, sudoers file and global.ini. +.br +- Setting HANA resource into maintenance mode. +.br +- Removing old rules from sudoers file on both nodes. +.br +- Removing old hook scripts from global.ini on both nodes. +.br +- Removing old cluster resources and constraints from CIB. +.br +- Removing old node attributes and SAPHanaSR properties from CIB. +.br +- Removing old SAPHanaSR package. +.PP +The script needs to be copied to all cluster nodes beforehand. It should be +called on the HANA primary node. Before doing this, you should check and prepare +pre-requisites, see example above. The proposed commands need to be checked. +Sometimes adaptions are neccessary. Of course the result needs to be checked +finally as well. See also SAPHanaSR_upgrade_to_angi(7). +.PP +.RS 2 +# SAPHanaSR-upgrade-to-angi-demo --erase +.RE +.PP +.\" +.SH FILES +.TP +/usr/share/SAPHanaSR-angi/samples/SAPHanaSR-upgrade-to-angi-demo +unsupported script for demonstrating the procedure on a test cluster +.TP +/etc/sudoers.d/SAPHanaSR +recommended place for sudo permissions of HADR provider hook scripts +.TP +/hana/shared/$SID/global/hdb/custom/config/global.ini +on-disk representation of HANA global system configuration +.TP +SAPHanaSR-upgrade-to-angi-demo. +directory with backup of old configuration +.PP +.\" +.SH REQUIREMENTS +.PP +* The initial configuration matches SUSE´s documented setup of the SAPHanaSR +scale-up performance-optimised scenario. +.br +* Linux cluster, HANA and system replication are in sane state before the +upgrade. All cluster nodes are online. +.br +* Package libxml2-tools is installed on both cluster nodes. Package ClusterTools2 +is recommended. +.br +* SAPHanaSR-upgrade-to-angi-demo is copied into /root/bin/ and made executable +on both cluster nodes. +.br +* SAPHanaSR-upgrade-to-angi-demo is executed on the HANA primary node. +.br +* SAPHanaSR-upgrade-to-angi-demo should be run on test systems. +.br +* A backup has been made for the cluster nodes. +.br +* The commands suggested by SAPHanaSR-upgrade-to-angi-demo have been checked before +they will be applied. +.br +* For further requirements see manual page SAPHanaSR_upgrade_to_angi(7). +.PP +.\" +.SH BUGS +.br +In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +.br +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , +\fBocf_suse_SAPHana\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , +\fBSAPHanaSR.py\fP(7) , \fBsusHanaSR.py\fP(7) , \fBSAPHanaSR_upgrade_to_angi\fP(7) , +\fBSAPHanaSR_maintenance_examples\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , +\fBcrm\fP(8) , \fBcrm_mon\fP(8) , \fBcrm_attribute\fP(8) , \fBcibadmin\fP(8) , +\fBcs_wait_for_idle\fP(8) , +.br +https://documentation.suse.com/sbp/sap/ , +.br +https://www.suse.com/c/tag/towardszerodowntime/ +.PP +.\" +.SH AUTHORS +.br +A.Briel, F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +.br +(c) 2024 SUSE LLC +.br +SAPHanaSR-upgrade-to-angi-demo comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" diff --git a/man/SAPHanaSR_basic_cluster.7 b/man/SAPHanaSR_basic_cluster.7 index 7f656e39..85ec5347 100644 --- a/man/SAPHanaSR_basic_cluster.7 +++ b/man/SAPHanaSR_basic_cluster.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR 7 "08 Aug 2023" "" "SAPHanaSR_basic_cluster" +.TH SAPHanaSR 7 "18 Mar 2024" "" "SAPHanaSR_basic_cluster" .\" .SH NAME SAPHanaSR_basic_cluster \- SAP HANA System Replication scale-up basic cluster configuration. @@ -10,7 +10,8 @@ SAPHanaSR_basic_cluster \- SAP HANA System Replication scale-up basic cluster co .\" The SAP HANA System Replication scale-up scenario needs a certain basic cluster configuration. Besides this necessary settings, some additional -configurations might match specific needs. +configurations might match specific needs. Adapting a few SAP HANA settings +might be beneficial as well. .\" .\" \fB* Corosync Basics\fR .\" @@ -110,6 +111,38 @@ In case systemd-style init is used for the HANA database, it might be desired to have the SAP instance service stopping after pacemaker at system shutdown. Therefor a drop-in file for the pacemaker service might help. See examples below. +.PP +\fB* SAP HANA Basics\fR + +\fB/usr/sap/${SID}/SYS/global/hdb/custom/config/global.ini\fR +.PP +\fB[memorymanager]\fR +.br +\fBfinal_memory_release_shutdown = [ auto | on | off ]\fR +.br +\fBfinal_memory_release_crash = [ auto | on | off ]\fR +.PP +Starting with SAP HANA 2.0 SPS06, the database shutdown can be accelerated by +optimizing memory de-allocation. Please refer to SAP documentation before setting +this parameters. +.\" TODO SAP notes 3405297 ? + +\fB/usr/sap/${SID}/SYS/global/hdb/custom/config/daemon.ini +.PP +\fB[daemon]\fR +.br +\fBterminationtimeout = [ \fImillisec\fB ]\fR +.br +\fBforcedterminationtimeout = [ \fImillisec\fB ]\fR +.PP +The first parameter defines the timeout from sending SIGTERM to finally terminating +child processes when HANA is shutting down by the STOP event. Used also as maximal +delay in system restart if 'restartterminationtimeout' parameter is not set. +The second defines the timeout from sending the SIGTERM to finally terminating +child processes when HANA is shutting down by the QUIT event. +Please refer to SAP documentation before setting this parameters. +.\" TODO check above + .PP .\" .SH EXAMPLES @@ -164,7 +197,7 @@ SBD resource: .RS 2 primitive rsc_stonith_sbd stonith:external/sbd \\ .br - params pcmk_delay_max="30" + params pcmk-delay-max=30 .RE .PP @@ -177,7 +210,7 @@ Example for a priority fencing disk-based SBD resource. .br primitive rsc_stonith_sbd stonith:external/sbd \\ .br - params pcmk_delay_base=15 \\ + params pcmk-delay-max=15 \\ .br property cib-bootstrap-options: \\ .br @@ -416,7 +449,7 @@ A.Briel, F.Herschel, L.Pinne. .SH COPYRIGHT (c) 2018 SUSE Linux GmbH, Germany. .br -(c) 2019-2023 SUSE LLC +(c) 2019-2024 SUSE LLC .br For details see the GNU General Public License at http://www.gnu.org/licenses/gpl.html diff --git a/man/SAPHanaSR_maintenance_examples.7 b/man/SAPHanaSR_maintenance_examples.7 index 190a91ee..5db66fc2 100644 --- a/man/SAPHanaSR_maintenance_examples.7 +++ b/man/SAPHanaSR_maintenance_examples.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR_maintenance_examples 7 "06 Dec 2023" "" "SAPHanaSR" +.TH SAPHanaSR_maintenance_examples 7 "25 Jan 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_maintenance_examples \- maintenance examples for SAPHanaController. @@ -44,6 +44,31 @@ This might be convenient when performing administrative actions or cluster tests # watch -n9 "crm_mon -1r --include=none,nodes,resources,failures;echo;SAPHanaSR-showAttr;cs_clusterstate -i|grep -v '#'" .RE .PP +\fB*\fR Overview on stopping the HANA database at one site. + +This procedure does work for scale-up and scale-out. No takeover will be done. This procedure +should be used, when it is neccessary to stop the HANA database. Stopping the HANA database +should not be done by just stopping the Linux cluster or shutting down the OS. This particularly +applies to scale-out systems. It might be good to define upfront which HANA site needs to be +stopped. In case both sites need to be stopped, it might be good to define the order. First +stopping the primary should keep system replication in sync. +.br +How long a stop will take, depends on database size, performance of underlying infrastructure, +SAP HANA version and configuration. Please refer to SAP HANA documentation for details on +tuning and stopping an HANA database. +.PP +.RS 4 +1. Checking status of Linux cluster and HANA system replication pair. +.br +2. Setting SAPHana or SAPHanaController multi-state resource into maintenance. +.br +3. Stopping HANA database at the given site by using "sapcontrol -nr -function StopSystem". +.br +4. Checking that HANA is stopped. +.RE +.PP +Note: Do not forget to end the resource maintenance after you have re-started the HANA database. +.PP \fB*\fR Initiate an administrative takeover of the HANA primary from one node to the other by using the Linux cluster. This procedure does not work for scale-out. On scale-up, it will stop the HANA primary. @@ -568,7 +593,7 @@ F.Herschel, L.Pinne. .SH COPYRIGHT (c) 2017-2018 SUSE Linux GmbH, Germany. .br -(c) 2019-2023 SUSE LLC +(c) 2019-2024 SUSE LLC .br This maintenance examples are coming with ABSOLUTELY NO WARRANTY. .br diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 index 6c32d5a9..007d763d 100644 --- a/man/SAPHanaSR_upgrade_to_angi.7 +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR_upgrade_to_angi 7 "14 Feb 2024" "" "SAPHanaSR" +.TH SAPHanaSR_upgrade_to_angi 7 "02 Apr 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_upgrade_to_angi \- How to upgrade from SAPHanaSR or SAPHanaSR-ScaleOut to SAPHanaSR-angi. @@ -15,89 +15,125 @@ SAPHanaSR-angi is quite similar to SAPHanaSR and SAPHanaSR-ScaleOut, but not fully backward compatible. Upgrading existing clusters is possible by following a defined procedure. The upgrade should lead to the same configuration as an installation from scratch. +.PP The upgrade procedure depends on an initial setup as decribed in setup guides and manual pages. See REQUIREMENTS below and in manual pages SAPHanaSR(7) or -SAPHanaSR-ScaleOut(7). +SAPHanaSR-ScaleOut(7). The procedure does not neccesarily need downtime for +HANA, if planned and excuted carefully. Nevertheless, it should be done under +friendly conditions. .PP \fB*\fR What will be changed for SAP HANA scale-up scenarios? .PP -.RS 2 +SAPHanaSR-angi unifies HA for HANA scale-up and scale-out. Therefor it handles +scale-up as subset of scale-out, which changes the structure of attributes. +The most significant changes are listed below. +.PP a. The SAPHana RA and its multi-state config will be replaced by the new SAPHanaController and its clone promotable config. .br b. The SAPHanaSR.py HADR provider hook script will be replaced by the new susHanaSR.py. .br -c. Tools are placed in /usr/bin/. +c. Tools are placed in /usr/bin/ instead of /usr/sbin/. .br d. Node attributes will be removed. -.br +.RS 4 hana__vhost +.br hana__site +.br hana__remoteHost +.br lpa__lpt -hana__op_mode -hana__srmode -hana__sync_state -TODO .br -e. Site and global attributes will be removed from property SAPHanaSR. +hana__op_mode .br -TODO +hana__srmode .br -f. Site and global attributes will be added to property SAPHanaSR. +hana__sync_state .br +First and second field of hana__roles +.RE +.\" e. Site and global attributes will be removed from property SAPHanaSR +.\" .br +e. Site and global attributes will be added to property SAPHanaSR. +.RS 4 hana__glob_topology +.br hana__glob_prim +.br hana__glob_sec +.br hana__site_lpt_ +.br hana__site_lss_ +.br hana__site_mns_ +.br hana__site_srr_ +.br hana__site_opMode_ +.br hana__site_srMode_ -hana__site_srHook_ +.br hana__site_srPoll_ -TODO +.br +TODO vhost remoteHost .RE .PP \fB*\fR What will be changed for SAP HANA scale-out scenarios? .PP -.RS 2 +SAPHanaSR-angi unifies HA for HANA scale-up and scale-out. The structure of +attributes stays unchanged. The most significant changes are listed below. +.PP a. The SAPHanaController RA and its multi-state config will be replaced by the new SAPHanaController and its clone promotable config. .br b. The SAPHanaSrMultiTarget.py HADR provider hook script will be replaced by the new susHanaSR.py. .br -c. Tools are placed in /usr/bin/. +c. Tools are placed in /usr/bin/ instead of /usr/sbin/. .br d. Node attributes will be removed. -.br +.RS 4 gra -gsh .br +gsh +TODO +.RE e. Site and global attributes will be removed from property SAPHanaSR. -.br +.RS 4 mts upd hana__glob_sync_state hana__glob_srHook (in case of obsolete scale-out SAPHanaSR.py) TODO -.br +.RE f. Site and global attributes will be added to property SAPHanaSR. -.br +.RS 4 hana__glob_topology +.br hana__site_lpt_ +.br hana__site_lss_ +.br hana__site_mns_ +.br hana__site_srr_ +.br hana__site_srMode_ +.br hana__site_srPoll_ +.br TODO .RE .PP -\fB*\fR How does the procedure look like at a glance? +\fB*\fR How does the upgrade procedure look like at a glance? +.PP +The upgrade procedure consists of four phases: preparing, removing, adding, +finalising. Linux cluster and HANA are kept running. However, resource +management is disabled and the system goes thru fragiles states during the +upgrade. .PP .RS 2 1.1 Check for sane state of cluster, HANA and system replication @@ -132,6 +168,8 @@ TODO 4.1 Check for sane state of cluster, HANA and system replication .br 4.2 Test RA on secondary and trigger susHanaSR.py (optional) +.br +4.3 Remove ad-hoc backup from local directories .RE .PP \fB*\fR What needs to be prepared upfront? @@ -141,6 +179,7 @@ SAPHanaSR-angi. Refresh your knowledge of SAPHanaSR or SAPHanaSR-ScaleOut. .PP Next the following information needs to be collected and documented before upgrading a cluster: +.PP .RS 2 1.1 Path to config backup directory at both sites .br @@ -228,7 +267,25 @@ Needs to be done once per Linux cluster. # crm configure show rsc_SAPHana_HA1_HDB00 .RE .PP -\fB*\fR Example for removing SAPHana resource config from CIB. +\fB*\fR Example for making a backup of CIB, sudo config and global.ini. +.PP +SID is HA1, sudo config is /etc/sudoers.d/SAPHanaSR. +.PP +.RS 2 +# export BAKDIR=SAPHanaSR.$(date +%s) +.br +# mkdir ~/$BAKDIR +.br +# cp -a /hana/shared/HA1/global/hdb/custom/config/global.ini ~/$BAKDIR/ +.br +# cp -a /etc/sudoers.d/SAPHanaSR ~/$BAKDIR/SAPHanaSR.sudo +.br +# crm configure show >~/$BAKDIR/crm_configure.txt +.br +# ls -l ~/$BAKDIR/* +.RE +.PP +\fB*\fR Example for removing SAPHana resource config from CIB, scale-up. .PP First the CIB is written to file for backup. Next the cluster is told to not stop orphaned resources and the SAPHana @@ -242,7 +299,7 @@ The resource names have been determined as shown in the example above. example above. .PP .RS 2 -# crm configure show > cib.SAPHanaSR-backup +# crm configure show > SAPHanaSR-crm-backup .br # echo "property cib-bootstrap-options: stop-orphan-resources=false"|\\ crm configure load update - @@ -269,6 +326,22 @@ example above. # crm_mon -1r .RE .PP +\fB*\fR Example for removing location constraints from CIB, scale-out. +.PP +First, the same steps as for scale-up have to be done, see example above. +In addition the (anti-)location constraints for the majority maker node have to +be removed. The resource names have been determined as shown in the example above. +.PP +.RS 2 +# cibadmin --delete --xpath \\ +.br + "//rsc_location[@id='SAPHanaCon_not_on_majority_maker']" +.br +# cibadmin --delete --xpath \\ +.br + "//rsc_location[@id='SAPHanaTop_not_on_majority_maker']" +.RE +.PP \fB*\fR Example for removing all reboot-safe node attributes from CIB. .PP All reboot-safe node attributes will be removed. Needed attributes are expected @@ -283,18 +356,16 @@ See also crm_attribute(8). .br # crm configure show node1 | tr " " "\\n" |\\ .br - tail -n +6 | awk -F "=" '{print $1}' |\\ -.br -while read; do \\ + awk -F "=" 'NR>5 {print $1}' | while read; do \\ .br crm_attribute --node node1 --name $REPLY --delete; done .RE .PP -\fB*\fR Example for removing non-reboot-safe node attribute from CIB. +\fB*\fR Example for removing non-reboot-safe node attribute from CIB, scale-up. .PP The attribute hana__sync_state will be removed. Of course the CIB should be checked to see if the removal was successful. -Needs to be done for both nodes, scale-up only. +Needs to be done for both nodes. Scale-up only. Node is node1, SID is HA1. See also crm_attribute(8). .PP @@ -308,14 +379,14 @@ See also crm_attribute(8). --lifetime reboot --delete .RE .PP -\fB*\fR Example for removing all SAPHanaSR property attributes from CIB. +\fB*\fR Example for removing all SAPHanaSR property attributes from CIB, scale-out. .PP All attributes of porperty SAPHanaSR will be removed. Needed attributes are expected to be re-added by the RAs later. The attribute for srHook will be added by the susHanaSR.py HADR provider script and might be missing until the HANA system replication status changes. Of course the CIB should be checked to see if the removal was successful. -Needs to be done once per Linux cluster. +Needs to be done once per Linux cluster. Scale-out only. See also SAPHanaSR-showAttr(8) and SAPHanaSR.py(7) or SAPHanaSrMultiTarget.py(7) respectively. .PP @@ -324,9 +395,7 @@ respectively. .br # crm configure show SAPHanaSR |\\ .br - awk -F"=" '$1~/hana_/ {print $1}' |\\ -.br -while read; do \\ + awk -F"=" '$1~/hana_/ {print $1}' | while read; do \\ .br crm_attribute --delete --type crm_config --name $REPLY; done .RE @@ -338,9 +407,11 @@ the section is determined from global.ini. Then the currenct HADR provider section is shown. If the section is identical with the shipped template, it can be removed easily from the configuration. Finally the HADR provider hook script is removed from running HANA. Needs to be done for each HANA site. -SID is HA1, case sensitive HADR provider name is SAPHanaSR. See manual page -SAPHanaSR.py(7) or SAPHanaSrMultiTarget.py(7) for details on checking the hook -script integration. +SID is HA1, case sensitive HADR provider name is SAPHanaSR. The example is given +for scale-up SAPHanaSR.py, for scale-out SAPHanaSrMultiTarget.py might be +removed instead. The path /usr/sbin/ is used, because this step is done while +the old RPM is still installed. See manual page SAPHanaSR.py(7) or +SAPHanaSrMultiTarget.py(7) for details on checking the hook script integration. .PP .RS 2 # su - ha1adm @@ -351,11 +422,11 @@ script integration. .br ~> grep -i ha_dr_provider_saphanasr global.ini .br -~> /usr/bin/SAPHanaSR-manageProvider --sid=HA1 --show \\ +~> /usr/sbin/SAPHanaSR-manageProvider --sid=HA1 --show \\ .br --provider=SAPHanaSR .br -~> /usr/bin/SAPHanaSR-manageProvider --sid=HA1 --reconfigure \\ +~> /usr/sbin/SAPHanaSR-manageProvider --sid=HA1 --reconfigure \\ .br --remove /usr/share/SAPHanaSR/samples/global.ini .br @@ -364,7 +435,8 @@ script integration. .PP \fB*\fR Example for removing the SAPHanaSR.py hook script from sudoers. .PP -Needs to be done on each node. +Needs to be done on each node. The example is given for scale-up SAPHanaSR.py, +for scale-out SAPHanaSrMultiTarget.py might be removed instead. See manual page SAPHanaSR.py(7) for details on checking the hook script integration. .PP @@ -380,7 +452,8 @@ integration. .PP The package SAPHanaSR is removed from all cluster nodes. Related packages defined by patterns and dependencies are not touched. Needs to be done once per -Linux cluster. +Linux cluster. The example is given for scale-up SAPHanaSR, for scale-out +SAPHanaSR-ScaleOut might be removed instead. .PP .RS 2 # crm cluster run "rpm -E --force SAPHanaSR" @@ -457,28 +530,45 @@ Of course status of cluster, HANA and system replication needs to be checked. /etc/sudoers.d/SAPHanaSR recommended place for sudo permissions of HADR provider hook scripts .TP -/usr/bin/ -path to tools +/usr/sbin/ , /usr/bin/ +path to tools before the upgrade, after the upgrade .TP /hana/shared/$SID/global/hdb/custom/config/global.ini on-disk representation of HANA global system configuration .TP /usr/share/SAPHanaSR/samples/global.ini -template for classical SAPHanaSR.py entry in global.ini +template for classical scale-up SAPHanaSR.py entry in global.ini +.TP +/usr/share/SAPHanaSR-ScalOut/samples/global.ini +template for classical scale-out SAPHanaSrMultiTarget.py entry in global.ini .TP /usr/share/SAPHanaSR-angi/samples/global.ini_susHanaSR template for susHanaSR.py entry in global.ini +.TP +/usr/share/SAPHanaSR-angi/samples/SAPHanaSR-upgrade-to-angi-demo +unsupported script for demonstrating the procedure on a test cluster .PP .\" .SH REQUIREMENTS .PP -* OS, Linux cluster and HANA are matching requirements for SAPHanaSR, or SAPHanaSR-ScaleOut respectively, and SAPHanaSR-angi. +* OS, Linux cluster and HANA are matching requirements for SAPHanaSR, or +SAPHanaSR-ScaleOut respectively, and SAPHanaSR-angi. +.br +* The resource configuration matches a documented setup. Even if the general +upgrade procedure is expected to work for customised configuration, details +might need special treatment. +.br +* The whole upgrade procedure is tested carefully and documented in detail +before being applied on production. .br -* Linux cluster, HANA and system replication are in sane state before the upgrade. All cluster nodes are online. +* Linux cluster, HANA and system replication are in sane state before the +upgrade. All cluster nodes are online. .br -* The whole procedure is tested carefully and documented in detail before being applied on production. +* The HANA database is idle during the upgrade. No other changes on OS, cluster, +database or infrastructure are done in parallel to the upgrade. .br -* Linux cluster, HANA and system replication are checked and in sane state before set back into production. +* Linux cluster, HANA and system replication are checked and in sane state +before set back into production. .PP .\" .SH BUGS @@ -491,7 +581,7 @@ In case of any problem, please use your favourite SAP support process to open a \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHana\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBSAPHanaSR.py\fP(7) , \fBSAPHanaSrMultiTarget.py\fP(7) , -\fBsusHanaSR.py\fP(7) , +\fBsusHanaSR.py\fP(7) , \fBSAPHanaSR-upgrade-to-angi-demo\fP(8) , \fBSAPHanaSR_maintenance_examples\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , \fBcrm\fP(8) , \fBcrm_mon\fP(8) , \fBcrm_attribute\fP(8) , \fBcibadmin\fP(8) , .br diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index f8605b6f..4d4d6e87 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH ocf_suse_SAPHanaController 7 "13 Dec 2023" "" "OCF resource agents" +.TH ocf_suse_SAPHanaController 7 "14 Mar 2024" "" "OCF resource agents" .\" .SH NAME SAPHanaController \- Manages takeover between two SAP HANA databases with system replication. @@ -135,6 +135,21 @@ Normally you do not need to set this parameter. Optional, well known directories will be searched by default. .RE .PP +\fBON_FAIL_ACTION\fR +.RS 4 +Defines how the RA escalates monitor failures on an HANA primary node. +If srHook=SOK, in case of monitor failure an node fencing could be triggered. +For srHook=SFAIL, the restart will be proceeded as usual. This option may speed +up takeover on scale-up systems, depending on how long HANA needs for stopping. +Values: [ proceed | fence ]. +.br +- proceed: proceed the failure as usual, i.e. initiate demote-stop sequence. +.br +- fence: trigger stop failure and node fencing, if conditions are matched. +.br +Experimental (Optional). Default value: proceed. +.RE +.PP \fBPREFER_SITE_TAKEOVER\fR .RS 4 Defines whether RA should prefer to takeover to the secondary database instead of restarting the primary site locally. However a takeover will only be triggered, if the SAP HANA landscape status is on "ERROR". For "FATAL" a local restart is initiated. PREFER_SITE_TAKEOVER usually is choosen for HANA system replication performance-optimised setups. On the other hand local restart of the master instead of takeover could be combined with HANA's persistent memory features. Example: "PREFER_SITE_TAKEOVER=true". @@ -213,7 +228,10 @@ Starts the HANA instance or brings the "clone instance" to a WAITING status. The .RS 4 Stops the HANA instance. The correct timeout depends on factors like database size. -If HANA database internal timeouts have been tuned for fast shutdown, the RA timeout might be reduced. +Starting with SAP HANA 2.0 SPS06, shutdown can be accelerated by optimizing memory +de-allocation. See also manual page SAPHanaSR_basic_cluster(7). +If HANA database memory de-allocation and internal timeouts have been tuned for +fast shutdown, the RA timeout might be reduced. .\" TODO point to HANA parameters Suggested minimum timeout: 600\&. .RE @@ -522,7 +540,7 @@ F.Herschel, L.Pinne. .br (c) 2015-2017 SUSE Linux GmbH, Germany. .br -(c) 2018-2023 SUSE LLC +(c) 2018-2024 SUSE LLC .br The resource agent SAPHanaController comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/susChkSrv.py.7 b/man/susChkSrv.py.7 index e0946a63..beb8bd3e 100644 --- a/man/susChkSrv.py.7 +++ b/man/susChkSrv.py.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH susChkSrv.py 7 "13 Apr 2023" "" "SAPHanaSR" +.TH susChkSrv.py 7 "18 Mar 2024" "" "SAPHanaSR" .\" .SH NAME susChkSrv.py \- Provider for SAP HANA srHook method srServiceStateChanged(). @@ -56,7 +56,7 @@ Mandatory. Must not be changed. \fBpath = /usr/share/SAPHanaSR-angi\fP Mandatory. Delivered within RPM package. Please change only if requested. .TP -\fBexecution_order = [ INTEGER ]\fP +\fBexecution_order = [ \fIINTEGER\fB ]\fP Mandatory. Order might depend on other hook scripts. .TP \fBaction_on_lost = [ ignore | stop | kill | fence ]\fP @@ -70,21 +70,20 @@ If this is combined with SAPHana or SAPHanaController RA parameter 'AUTOMATED_RE HANA needs to release all OS resources prior to the automated registering. See also manual page ocf_suse_SAPHanaController(7). .br -- \fBkill\fP: do 'HDB kill-'. The signal can be defined by parameter 'kill_signal'. +- \fBkill\fP: do 'HDB kill-<\fIsignal\fR>'. The signal can be defined by parameter 'kill_signal'. If this is combined with SAPHanaController RA parameter 'AUTOMATED_REGISTER=true', HANA needs to release all OS resources prior to the automated registering. .br -- \fBfence\fP: do 'crm node fence '. This needs a Linux cluster STONITH - method and sudo permission. This action is primarily meant for scale-up. If -it happens on a scale-out worker node, the remaining master needs to time out -before the Linux cluster will react. +- \fBfence\fP: do 'crm node fence <\fIhost\fR>'. This needs a Linux cluster +STONITH method and sudo permission. This action is primarily meant for scale-up.If it happens on a scale-out worker node, the remaining master needs to time +out before the Linux cluster will react. .br .\" TODO - suicide: do 'systemctl reboot'. Do NOT use this! .\" .br Optional. Default is ignore. .TP -\fBkill_signal = [ INTEGER ]\fP -Signal to be used with 'HDB kill-'. +\fBkill_signal = [ \fIINTEGER\fB ]\fP +Signal to be used with 'HDB kill-<\fIsignal\fR>'. .br Optional. Default is 9. .\" TODO @@ -106,10 +105,10 @@ Optional. Default is 9. .\" .br .\" Optional. Default is tenant TODO. .TP -\fBstop_timeout = [ INTEGER ]\fP -How many seconds to wait for 'sapcontrol ... StopSystem' to return? -Should be greater than value of daemon.ini parameter 'forcedtimeout'. -.\" TODO what is "forcedtimeout" ? +\fBstop_timeout = [ \fIINTEGER\fB ]\fP +How many seconds to wait for 'sapcontrol ... StopSystem' to return. +Should be greater than value of HANA parameter 'forcedterminationtimeout'. +See also SAPHanaSR_basic_cluster(7). .br Optional. Default is 20 seconds. .TP @@ -122,25 +121,25 @@ Optional. Default is info. Will be added automatically if not set. .PP * The HANA daemon TODO for the daemon section of daemon.ini: .\" TODO check the below values with SAP -.TP +.PP \fB[daemon]\fP .TP -\fBterminationtimeout = [ INTEGER ]\fP -TODO Should be 45000. +\fBterminationtimeout = [ \fIINTEGER\fB ]\fP .br -Optional. Timeout in milliseconds. Default is TODO. +See also SAPHanaSR_basic_cluster(7). +Optional. Timeout in milliseconds. Default is 30000. .TP -\fBforcedterminationtimeout = [ INTEGER ]\fP -TODO Should be 15000. +\fBforcedterminationtimeout = [ \fIINTEGER\fB ]\fP .br -Optional. Timeout in milliseconds. Default is TODO. +See also SAPHanaSR_basic_cluster(7). +Optional. Timeout in milliseconds. Default is 270000. .PP * The HANA daemon TODO for the indexserver. section of daemon.ini: .\" TODO check the below values with cloud partner .TP -\fB[indexserver.]\fP +\fB[indexserver.<\fItenant\fR>]\fP .TP -\fBgracetime = [ INTEGER ]\fP +\fBgracetime = [ \fIINTEGER\fB ]\fP TODO Should be 6000. .br Optional. Timeout in milliseconds. Default is 2000. @@ -241,6 +240,7 @@ Example SID is HA1, tenant is HA1. .br The sections daemon and indexserver.HA1 are needed on all HANA nodes. The HANA has to be stopped before the file can be changed. +Please refer to SAP documentation before setting this parameters. .PP .RS 2 [daemon] @@ -460,7 +460,7 @@ A.Briel, F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT -(c) 2022-2023 SUSE LLC +(c) 2022-2024 SUSE LLC .br susChkSrv.py comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/susHanaSR.py.7 b/man/susHanaSR.py.7 index 1b877128..05afc369 100644 --- a/man/susHanaSR.py.7 +++ b/man/susHanaSR.py.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH susHanaSR.py 7 "23 Jan 2024" "" "SAPHanaSR" +.TH susHanaSR.py 7 "01 Mar 2024" "" "SAPHanaSR" .\" .SH NAME susHanaSR.py \- Provider for SAP HANA srHook method srConnectionChanged(). @@ -66,7 +66,8 @@ Usage, syntax or execution errors. .\" .SH EXAMPLES .PP -\fB*\fR Example for entry in sudo permissions /etc/sudoers +\fB*\fR Example for entry in sudo permissions /etc/sudoers.d/SAPHanaSR +.PP .RS 2 # SAPHanaSR (Scale-Up) needs for srHook .br @@ -82,6 +83,14 @@ Example SID is HA1. # sudo -U ha1adm -l | grep "NOPASSWD.*crm_attribute.*hana_ha1" .RE .PP +\fB*\fR Example for checking HANA´s python version. +.br +This might be done before installing HADR provider hook scripts. SID is HA1. +.PP +.RS 2 +# su - ha1adm -c "python --version" +.RE +.PP \fB*\fR Example for entry in SAP HANA global configuration /hana/shared/$SID/global/hdb/custom/config/global.ini .br @@ -267,7 +276,7 @@ the internal cache for srHook status changes while Linux cluster is down, file i .PP .\" .SH REQUIREMENTS -1. SAP HANA 2.0 SPS05 rev.059 or later provides Python 3 as well as the HA/DR +1. SAP HANA 2.0 SPS05 rev.059.04 or later provides Python 3 as well as the HA/DR provider hook method srConnectionChanged() with multi-target aware parameters. The Python 3 and multi-target aware parameters are needed for the SAPHanaSR-angi package. diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 6f4308be..bb53216f 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -181,6 +181,11 @@ function saphana_print_parameters() { HANA instance profile name + + Technical preview: ON_FAIL_ACTION selects the level RA escalates monitor failures on primary. Useful values are "fence" and "proceed". + Technical preview: ON_FAIL_ACTION defines the RA escalation level after failures + + ' } # end function saphana_print_parameters @@ -385,10 +390,23 @@ function saphana_init_get_ocf_parameters() { AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" DUPLICATE_PRIMARY_TIMEOUT="${OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT:-7200}" ocf_env=$(env | grep 'OCF_RESKEY_CRM') + ON_FAIL_ACTION="${OCF_RESKEY_ON_FAIL_ACTION:-proceed}" super_ocf_log debug "DBG: OCF: $ocf_env" return 0 } # end function saphana_init_get_ocf_parameters +function saphana_reset_poison_pill() { + if [ -e "$pp_sap_hana_controller" ]; then + super_ocf_log info "RA reset old RA poison pill" + rm "$pp_sap_hana_controller" + fi +} # end function saphana_reset_poison_pill + +function saphana_set_poison_pill() { + super_ocf_log info "RA set RA poison pill" + touch "$pp_sap_hana_controller" +} # end function saphana_set_poison_pill + function saphana_init() { # function: saphana_init - initialize variables for the resource agent # params: - @@ -407,6 +425,7 @@ function saphana_init() { # create directory for HANA_CALL command sdtout and stderr tracking # runDir="/run/SAPHanaSR_${SID}" + pp_sap_hana_controller="/run/SAPHanaController_poison_pill_${SID}" mkdir -p "$runDir" chown "${SID,,}adm" "$runDir" super_ocf_log info "DEC: preparing runDir ($runDir) for access of user ${SID,,}adm" @@ -779,23 +798,33 @@ function saphana_stop() { local rc=0 local output="" super_ocf_log info "ACT: saphana_stop" - if is_the_master_nameserver; then - super_ocf_log info "ACT: saphana_stop: is_the_master_nameserver" - # Stop the entire SAP HANA site (StopSystem) - saphana_stopSystem; rc=$? - elif is_active_nameserver_slave && [ -z "$gTheMaster" ]; then - super_ocf_log info "ACT: saphana_stop: is_active_nameserver_slave and no master nameserver is available" - # Stop the entire SAP HANA site (StopSystem) - saphana_stopSystem; rc=$? - elif is_lost_nameserver_slave && [ -z "$gTheMaster" ]; then - super_ocf_log info "ACT: saphana_stop: is_lost_nameserver_slave and no master nameserver is available" - # Stop ONLY the local SAP HANA instance to avoid an isolated SAP HANA nameserver slave does shutdown the entire site - saphana_stopSystem Stop rc=$? + # + # FAST-STOP: Check id poison pill has been created by a failed monitor + # + if [[ -e "$pp_sap_hana_controller" ]]; then + super_ocf_log info "RA poison pill detected - reporting stop error - sleep 5s" + sleep 5 + saphana_reset_poison_pill + rc="$OCF_ERR_GENERIC" else - is_active_nameserver_slave; is_slave_rc=$? - super_ocf_log info "ACT: saphana_stop: NEITHER is_active_nameserver_slave (rc=$is_slave_rc) NOR is_the_master_nameserver debug: ($gTheMaster) NOR is_lost_nameserver_slave" - # TODO PRIO1: NG - Do we need to set a clone state here? - rc="$OCF_SUCCESS" + if is_the_master_nameserver; then + super_ocf_log info "ACT: saphana_stop: is_the_master_nameserver" + # Stop the entire SAP HANA site (StopSystem) + saphana_stopSystem; rc=$? + elif is_active_nameserver_slave && [ -z "$gTheMaster" ]; then + super_ocf_log info "ACT: saphana_stop: is_active_nameserver_slave and no master nameserver is available" + # Stop the entire SAP HANA site (StopSystem) + saphana_stopSystem; rc=$? + elif is_lost_nameserver_slave && [ -z "$gTheMaster" ]; then + super_ocf_log info "ACT: saphana_stop: is_lost_nameserver_slave and no master nameserver is available" + # Stop ONLY the local SAP HANA instance to avoid an isolated SAP HANA nameserver slave does shutdown the entire site + saphana_stopSystem Stop rc=$? + else + is_active_nameserver_slave; is_slave_rc=$? + super_ocf_log info "ACT: saphana_stop: NEITHER is_active_nameserver_slave (rc=$is_slave_rc) NOR is_the_master_nameserver debug: ($gTheMaster) NOR is_lost_nameserver_slave" + # TODO PRIO1: NG - Do we need to set a clone state here? + rc="$OCF_SUCCESS" + fi fi super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" return "$rc" @@ -1831,6 +1860,7 @@ function saphana_monitor_clone() { # # reset possible left SRACTION_HISTORY # + saphana_reset_poison_pill set_hana_attribute "$NODENAME" "-" "${ATTR_NAME_HANA_SRACTION_HISTORY[@]}" if ocf_is_probe; then super_ocf_log info "DEC: PROBE ONLY" @@ -1851,6 +1881,25 @@ function saphana_monitor_clone() { fi fi else + # + # optional FAST-STOP + # + if [[ "$gFullRole" =~ ^1:P: ]]; then + if [[ "$ON_FAIL_ACTION" == "fence" ]]; then + test_rem_sr=$(get_SRHOOK "$gRemSite") + if [[ "$test_rem_sr" == "SOK" ]]; then + super_ocf_log info "RA monitor() ON_FAIL_ACTION=$ON_FAIL_ACTION, remote site is SOK => BYPASS SCORING HERE" + super_ocf_log info "RA monitor() ON_FAIL_ACTION=$ON_FAIL_ACTION, remote site is SOK and monitor failes => create poison pill file" + saphana_set_poison_pill + else + super_ocf_log info "RA test_rem_sr=$test_rem_sr != 'SOK'" + fi + else + super_ocf_log info "RA ON_FAIL_ACTION != 'fence'" + fi + else + super_ocf_log info "RA gFullRole=$gFullRole does not match '^1:P:'" + fi super_ocf_log info "DEC: scoring_crm_promote **11**" scoring_crm_promote "$gFullRole" "$gSrHook" # shellcheck disable=SC2154 @@ -1911,6 +1960,21 @@ function saphana_monitor_clone() { # saphana_monitor_clone_not_msn fi + # + # optional FAST-STOP - remove possible poison pill, if monitor is ok right now + # + if ! ocf_is_probe; then + if [[ "$ON_FAIL_ACTION" == "fence" ]]; then + if [[ "$test_rem_sr" == "SOK" ]]; then + if [[ "$rc" != "$OCF_SUCCESS" && "$rc" != "$OCF_RUNNING_MASTER" ]]; then + rc="$OCF_ERR_GENERIC" + fi + fi + fi + if [[ "$rc" == "$OCF_SUCCESS" || "$rc" == "$OCF_RUNNING_MASTER" ]]; then + saphana_reset_poison_pill + fi + fi super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" return "$rc" } # end function saphana_monitor_clone @@ -2033,6 +2097,24 @@ function saphana_demote_clone() { else rc="$OCF_SUCCESS" fi + # + # optional FAST-STOP (let demote fail to force stop of the resource; stop then triggers an error -> fence) + # + if [[ "$gFullRole" =~ ^1:P: ]]; then + if [[ "$ON_FAIL_ACTION" == "fence" ]]; then + test_rem_sr=$(get_SRHOOK "$gRemSite") + if [[ "$test_rem_sr" == "SOK" ]]; then + super_ocf_log info "RA monitor() ON_FAIL_ACTION=$ON_FAIL_ACTION, remote site is SOK => demote failed to trigger stop" + rc="$OCF_ERR_GENERIC" + else + super_ocf_log info "RA test_rem_sr=$test_rem_sr != 'SOK'" + fi + else + super_ocf_log info "RA ON_FAIL_ACTION != 'fence'" + fi + else + super_ocf_log info "RA gFullRole=$gFullRole does not match '^1:P:'" + fi super_ocf_log info "ACT: Demoted $SID-$InstanceName." super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" return "$rc" diff --git a/ra/saphana-filesystem-lib b/ra/saphana-filesystem-lib index 535e2121..1dee3e1b 100755 --- a/ra/saphana-filesystem-lib +++ b/ra/saphana-filesystem-lib @@ -237,7 +237,7 @@ function shfs_stop() { if [ -e "$pp_hana_shared" ]; then rm "$pp_hana_shared" super_ocf_log info "RA poison pill detected - reporting stop error - sleep 10s" - sleep 10 + sleep 10 rc="$OCF_ERR_GENERIC" else rc="$ha_ps_rc" diff --git a/srHook/susHanaSR.py b/srHook/susHanaSR.py index a1ba7776..42282e96 100755 --- a/srHook/susHanaSR.py +++ b/srHook/susHanaSR.py @@ -92,11 +92,11 @@ def srConnectionChanged(self, ParamDict, **kwargs): fallback_file_name = f"../.crm_attribute.{my_site}" fallback_stage_file_name = f"../.crm_attribute.stage.{my_site}" if ret_code == 0: - # + # # cluster attribute set was successfull - delete pending fallback file, if existing try: os.remove(fallback_file_name) - self.tracer.info("new event - pending fallback file {0} deleted".format(fallback_file_name)) + self.tracer.info(f"new event - pending fallback file {fallback_file_name} deleted") except FileNotFoundError: pass else: diff --git a/test/SAPHanaSR-checkJson b/test/SAPHanaSR-checkJson index 4315bd57..7fde792e 100755 --- a/test/SAPHanaSR-checkJson +++ b/test/SAPHanaSR-checkJson @@ -48,5 +48,7 @@ except PermissionError as e_ferr: if not(quiet): print(json_data) + +exit(0) diff --git a/test/SaphanasrTestClass.txt b/test/SaphanasrTestClass.txt new file mode 100644 index 00000000..84ac64d5 --- /dev/null +++ b/test/SaphanasrTestClass.txt @@ -0,0 +1,112 @@ +class SaphanasrTest: + def message(self, msg, **kwargs): + CALLED-IN: debug(), __init__(), read_saphana_sr(), read_test_file(), run_checks(), process_step(), process_steps(), process_test(), action_call(), __do_ssh__() + CATEGORY: ALL + + def debug(self, msg, **kwargs): + CALLED-IN: __init__(), get_area_object_by_key_val(), get_value(), read_test_file(), __add_failed__(), run_checks(), process_topology_object(), __do_ssh__() + CATEGORY: ALL + + def __init__(self, *args, **kwargs): + CALLED-IN: importing program (constructor call) + CATEGORY: BASE (CLASS) + + def __insert_to_area__(self, area, the_object): + CALLED-IN: read_saphana_sr() + CATEGORY: READER-SR + + def __get_object__(self, area, object_name): + CALLED-IN: read_saphana_sr() + CATEGORY: READER-SR + + def create_object(self, object_name, key, val): + CALLED-IN: read_saphana_sr() + CATEGORY: READER-SR + + def __insert_to_object__(self, the_object, key, value): + CALLED-IN: read_saphana_sr() + CATEGORY: READER-SR + + def read_saphana_sr(self): + CALLED-IN: process_step() + CATEGORY: READER-SR + + def get_area_object_by_key_val(self, area_name, search_criteria, **kwargs): + CALLED-IN: importing program (using the object) + CATEGORY: BASE + + def get_value(self, area_name, object_name, key): + CALLED-IN: importing program (using the object) + CATEGORY: BASE + + def pretty_print(self, dictionary,level): + CALLED-IN: pretty_print() [recursive]; optional importing program (using the object) + CATERGORY: BASE + + def read_test_file(self): + CALLED-IN: importing program (using the object) + CATEGORY: READER-TEST + + def write_test_properties(self, topology): + CALLED-IN: importing program (using the object) + CATEGORY: READER-TEST ? + + def __add_failed__(self, area_object, key_val_reg): + CALLED-IN: run_checks() + CATEGORY: TESTER, CHECKER + + def __reset_failed__(self): + CALLED-IN: run_checks() + CATEGORY: TESTER, CHECKER + + def __get_failed__(self): + CALLED-IN: run_checks() + CATEGORY: TESTER, CHECKER + + def run_checks(self, checks, area_name, object_name, step_step ): + CALLED-IN: process_topology_object() + CATEGORY: TESTER, CHECKER + + def process_topology_object(self, step, topology_object_name, area_name): + CALLED-IN: process_step() + CATEGORY: TESTER, CHECKER ? + + def process_step(self, step): + CALLED-IN: process_steps() + CATEGORY: TESTER + + def process_steps(self): + CALLED-IN: process_test() + CATEGORY: TESTER + + def process_test(self): + CALLED-IN: importing program (using the object) + CATEGORY: TESTER + + def get_step(self, step_name): + CALLED-IN: process_steps() + CATEGORY: TESTER + + def action_call(self, action_name, cmd, remote): + CALLED-IN: action_on_hana(), action_on_cluster(), action_on_os() + CATEGORY: TESTER + + def action_on_hana(self, action_name): + CALLED-IN: action() + CATEGORY: TESTER + + def action_on_cluster(self, action_name): + CALLED-IN: action() + CATEGORY: TESTER + + def action_on_os(self, action_name): + CALLED-IN: action() + CATEGORY: TESTER + + def action(self, action_name): + CALLED-IN: process_step() + CATEGORY: TESTER + + def __do_ssh__(self, remote_host, user, cmd): + CALLED-IN: read_saphana_sr(), action_call() + CATEGORY: BASE, ALL diff --git a/test/json/angi-ScaleOut/kill_prim_indexserver.json b/test/json/angi-ScaleOut/kill_prim_indexserver.json index f9cd45ac..81f4e021 100644 --- a/test/json/angi-ScaleOut/kill_prim_indexserver.json +++ b/test/json/angi-ScaleOut/kill_prim_indexserver.json @@ -21,7 +21,7 @@ "step": "step20", "name": "failure detected", "next": "step30", - "loop": 120, + "loop": 180, "wait": 2, "comment": "sSite: srPoll could get SFAIL on scale-out", "pSite": [ diff --git a/test/json/classic-ScaleUp/restart_cluster_hana_running.json b/test/json/angi-ScaleOut/maintenance_cluster_hana_running.json similarity index 79% rename from test/json/classic-ScaleUp/restart_cluster_hana_running.json rename to test/json/angi-ScaleOut/maintenance_cluster_hana_running.json index 25ebf149..28341f5f 100644 --- a/test/json/classic-ScaleUp/restart_cluster_hana_running.json +++ b/test/json/angi-ScaleOut/maintenance_cluster_hana_running.json @@ -1,6 +1,6 @@ { - "test": "restart_cluster_hana_running", - "name": "restart_cluster_hana_running", + "test": "maintenance_cluster_hana_running", + "name": "stop and restart cluster, keep hana_running", "start": "prereq10", "steps": [ { @@ -9,7 +9,7 @@ "next": "final40", "loop": 1, "wait": 1, - "post": "shell test_restart_cluster_hana_running", + "post": "shell sct_test_maintenance_cluster_hana_running", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/angi-ScaleUp/defaults.json b/test/json/angi-ScaleUp/defaults.json index 041dd4a7..1c1d4bee 100644 --- a/test/json/angi-ScaleUp/defaults.json +++ b/test/json/angi-ScaleUp/defaults.json @@ -43,16 +43,16 @@ "score == 100" ], "pHostDown": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , - "score == 150" , + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", "standby == on" ], "pSiteDown": [ "lpt > 1000000000", - "lss == 1" , - "srr == P" , - "srHook == PRIM" , + "lss == 1", + "srr == P", + "srHook == PRIM", "srPoll == PRIM" ], "sSiteDown": [ @@ -63,9 +63,9 @@ "srPoll == SFAIL" ], "sHostDown": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , - "score == 100" , + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 100", "standby == on" ] } diff --git a/test/json/angi-ScaleUp/demo_kill_prim_inst.json b/test/json/angi-ScaleUp/demo_kill_prim_inst.json index 74b565fc..d0f2c00b 100644 --- a/test/json/angi-ScaleUp/demo_kill_prim_inst.json +++ b/test/json/angi-ScaleUp/demo_kill_prim_inst.json @@ -24,10 +24,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss ~ (1|2)" , - "srr == P" , - "lpt >~ 1000000000:20" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:20", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -38,13 +38,13 @@ "srPoll == SOK" ], "pHost": [ - "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)" , - "roles == master1::worker:" , + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "roles == master1::worker:", "score ~ (90|5|0)" ], "sHost": [ - "clone_state ~ (PROMOTED|DEMOTED)" , - "roles == master1:master:worker:master" , + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -56,10 +56,10 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 1" , - "srr == P" , - "lpt >~ 1000000000:(30|20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -70,14 +70,14 @@ "srPoll == SOK" ], "pHost": [ - "clone_state ~ (UNDEFINED|DEMOTED)" , - "roles == master1::worker:" , + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", "score ~ (90|5)" ], "sHost": [ - "clone_state ~ (DEMOTED|PROMOTED)" , - "roles == master1:master:worker:master" , - "score ~ (100|145)" , + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", "srah == T" ] }, diff --git a/test/json/angi-ScaleUp/freeze_prim_fs.json b/test/json/angi-ScaleUp/freeze_prim_fs.json index c50565d6..507d1b3e 100644 --- a/test/json/angi-ScaleUp/freeze_prim_fs.json +++ b/test/json/angi-ScaleUp/freeze_prim_fs.json @@ -22,9 +22,9 @@ "loop": 120, "wait": 2, "pSite": [ - "srr == P" , - "lpt >~ 1000000000:(20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "srr == P", + "lpt >~ 1000000000:(20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -38,7 +38,7 @@ ], "sHost": [ "clone_state ~ (PROMOTED|DEMOTED)", - "roles == master1:master:worker:master" , + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -51,9 +51,9 @@ "todo": "pHost+sHost to check site-name", "pSite": [ "lss ~ (1|2)", - "srr ~ (P|S)" , - "lpt >~ 1000000000:(30|20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "srr ~ (P|S)", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll ~ (PRIM|SFAIL)" ], "sSite": [ @@ -64,12 +64,12 @@ "srPoll ~ (SOK|PRIM)" ], "pHost": [ - "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)" , + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", "roles == master1::worker:" ], "sHost": [ - "clone_state ~ (DEMOTED|PROMOTED)" , - "roles == master1:master:worker:master" , + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", "score ~ (100|145|150)" ] }, diff --git a/test/json/angi-ScaleUp/kill_prim_indexserver.json b/test/json/angi-ScaleUp/kill_prim_indexserver.json index 382f5180..b2339179 100644 --- a/test/json/angi-ScaleUp/kill_prim_indexserver.json +++ b/test/json/angi-ScaleUp/kill_prim_indexserver.json @@ -19,13 +19,13 @@ "step": "step20", "name": "failure detected", "next": "step30", - "loop": 120, + "loop": 180, "wait": 2, "pSite": [ - "lss ~ (1|2)" , - "srr == P" , - "lpt >~ 1000000000:20" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:20", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -35,13 +35,13 @@ "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)" , - "roles == master1::worker:" , + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "roles == master1::worker:", "score ~ (90|5|0)" ], "sHost": [ - "clone_state ~ (PROMOTED|DEMOTED)" , - "roles == master1:master:worker:master" , + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -53,10 +53,10 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 1" , - "srr == P" , - "lpt >~ 1000000000:(30|20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -67,14 +67,14 @@ "srPoll == SOK" ], "pHost": [ - "clone_state ~ (UNDEFINED|DEMOTED)" , - "roles == master1::worker:" , + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", "score ~ (90|5)" ], "sHost": [ - "clone_state ~ (DEMOTED|PROMOTED)" , - "roles == master1:master:worker:master" , - "score ~ (100|145)" , + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", "srah == T" ] }, diff --git a/test/json/angi-ScaleUp/kill_prim_inst.json b/test/json/angi-ScaleUp/kill_prim_inst.json index ee40663b..bb503552 100644 --- a/test/json/angi-ScaleUp/kill_prim_inst.json +++ b/test/json/angi-ScaleUp/kill_prim_inst.json @@ -24,10 +24,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss ~ (1|2)" , - "srr == P" , - "lpt >~ 1000000000:20" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:20", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -38,13 +38,13 @@ "srPoll ~ (PRIM|SOK)" ], "pHost": [ - "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)" , - "roles == master1::worker:" , + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "roles == master1::worker:", "score ~ (90|5|0)" ], "sHost": [ - "clone_state ~ (PROMOTED|DEMOTED)" , - "roles == master1:master:worker:master" , + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -56,10 +56,10 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 1" , - "srr == P" , - "lpt >~ 1000000000:(30|20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -70,14 +70,14 @@ "srPoll == SOK" ], "pHost": [ - "clone_state ~ (UNDEFINED|DEMOTED)" , - "roles == master1::worker:" , + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", "score ~ (90|5)" ], "sHost": [ - "clone_state ~ (DEMOTED|PROMOTED)" , - "roles == master1:master:worker:master" , - "score ~ (100|145)" , + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", "srah == T" ] }, diff --git a/test/json/angi-ScaleUp/kill_prim_node.json b/test/json/angi-ScaleUp/kill_prim_node.json index f0802493..5fc9f8e1 100644 --- a/test/json/angi-ScaleUp/kill_prim_node.json +++ b/test/json/angi-ScaleUp/kill_prim_node.json @@ -22,9 +22,9 @@ "loop": 120, "wait": 2, "pSite": [ - "srr == P" , - "lpt ~ (1[6-9]........|20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "srr == P", + "lpt ~ (1[6-9]........|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll == PRIM" ], "sSite": [ @@ -41,7 +41,7 @@ ], "sHost": [ "clone_state ~ (PROMOTED|DEMOTED)", - "roles == master1:master:worker:master" , + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -49,14 +49,14 @@ "step": "step30", "name": "begin recover", "next": "final40", - "loop": 300, + "loop": 180, "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ "lss ~ (1|2)", - "srr ~ (P|S)" , - "lpt ~ (1[6-9]........|30|20|10)" , - "srHook ~ (PRIM|SWAIT|SREG)" , + "srr ~ (P|S)", + "lpt ~ (1[6-9]........|30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", "srPoll ~ (PRIM|SFAIL)" ], "sSite": [ @@ -67,12 +67,12 @@ "srPoll ~ (SOK|PRIM)" ], "pHost": [ - "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)" , + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", "roles == master1::worker:" ], "sHost": [ - "clone_state ~ (DEMOTED|PROMOTED)" , - "roles == master1:master:worker:master" , + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", "score ~ (100|145|150)" ] }, @@ -80,7 +80,7 @@ "step": "final40", "name": "end recover", "next": "END", - "loop": 300, + "loop": 150, "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", diff --git a/test/json/angi-ScaleUp/kill_secn_indexserver.json b/test/json/angi-ScaleUp/kill_secn_indexserver.json index 94f21fa9..893ce0f4 100644 --- a/test/json/angi-ScaleUp/kill_secn_indexserver.json +++ b/test/json/angi-ScaleUp/kill_secn_indexserver.json @@ -22,10 +22,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -36,13 +36,13 @@ "srPoll ~ (SFAIL|SOK)" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state == DEMOTED" , - "roles == master1::worker:" , + "clone_state == DEMOTED", + "roles == master1::worker:", "score ~ (-INFINITY|0)" ] }, @@ -54,10 +54,10 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -68,13 +68,13 @@ "srPoll ~ (SFAIL|SOK)" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , + "clone_state == UNDEFINED", + "roles == master1::worker:", "score ~ (-INFINITY|0|-1)" ] }, diff --git a/test/json/angi-ScaleUp/kill_secn_inst.json b/test/json/angi-ScaleUp/kill_secn_inst.json index 66b68f6d..0435374b 100644 --- a/test/json/angi-ScaleUp/kill_secn_inst.json +++ b/test/json/angi-ScaleUp/kill_secn_inst.json @@ -22,10 +22,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -36,13 +36,13 @@ "srPoll ~ (SFAIL|SOK)" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state == DEMOTED" , - "roles == master1::worker:" , + "clone_state == DEMOTED", + "roles == master1::worker:", "score ~ (-INFINITY|0)" ] }, @@ -54,10 +54,10 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -68,13 +68,13 @@ "srPoll ~ (SFAIL|SOK)" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state ~ (UNDEFINED|DEMOTED)" , - "roles == master1::worker:" , + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", "score ~ (-INFINITY|0|-1)" ] }, diff --git a/test/json/angi-ScaleUp/kill_secn_node.json b/test/json/angi-ScaleUp/kill_secn_node.json index 3941f1bd..c39f8857 100644 --- a/test/json/angi-ScaleUp/kill_secn_node.json +++ b/test/json/angi-ScaleUp/kill_secn_node.json @@ -22,10 +22,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -35,13 +35,13 @@ "srPoll == SFAIL" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ "clone_state is None", - "roles is None" , + "roles is None", "score is None" ] }, @@ -49,14 +49,14 @@ "step": "step30", "name": "begin recover", "next": "final40", - "loop": 150, + "loop": 180, "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -67,13 +67,13 @@ "srPoll ~ (SFAIL|SOK)" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state ~ (UNDEFINED|DEMOTED)" , - "roles == master1::worker:" , + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", "score ~ (-INFINITY|0|-1)" ] }, @@ -81,7 +81,7 @@ "step": "final40", "name": "end recover", "next": "END", - "loop": 120, + "loop": 150, "wait": 2, "post": "cleanup", "pSite": "pSiteUp", diff --git a/test/json/angi-ScaleUp/maintenance_cluster_bootstrap.json b/test/json/angi-ScaleUp/maintenance_cluster_bootstrap.json new file mode 100644 index 00000000..c65f7e69 --- /dev/null +++ b/test/json/angi-ScaleUp/maintenance_cluster_bootstrap.json @@ -0,0 +1,114 @@ +{ + "test": "maintenance_cluster_bootstrap", + "name": "delete cluster config and begin from scratch", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "shell sct_test_delete_cluster_config", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "cluster without resources and attributes", + "next": "step30", + "loop": 30, + "wait": 2, + "post": "shell sct_test_create_cluster_config", + "pSite": [ + "srr is None", + "lss is None" + ], + "sSite": [ + "srr is None", + "lss is None" + ], + "pHost": [ + "clone_state is None", + "roles is None", + "score is None" + ], + "sHost":[ + "clone_state is None", + "roles is None", + "score is None" + ] + }, + { + "step": "step30", + "name": "cluster back - need to trigger srHook (block)", + "next": "step35", + "loop": 100, + "wait": 2, + "post": "shell sct_test_block_sap_hana_sr", + "pSite": [ + "srr == P", + "lss == 4", + "srHook == PRIM", + "srPoll == PRIM" + ], + "sSite": [ + "srr == S", + "lss == 4", + "srHook is None", + "srPoll == SOK" + ], + "pHost": [ + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" + ], + "sHost":[ + "clone_state == DEMOTED", + "roles == master1:master:worker:master" + ] + }, + { + "step": "step35", + "name": "cluster back - need to trigger srHook (unblock)", + "next": "final40", + "loop": 60, + "wait": 2, + "post": "shell sct_test_unblock_sap_hana_sr", + "pSite": [ + "srr == P", + "lss == 4", + "srHook == PRIM", + "srPoll == PRIM" + ], + "sSite": [ + "srr == S", + "lss == 4", + "srHook == SFAIL", + "srPoll == SFAIL" + ], + "pHost": [ + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" + ], + "sHost":[ + "clone_state == DEMOTED", + "roles == master1:master:worker:master" + ] + }, + { + "step": "final40", + "name": "running again", + "next": "END", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/angi-ScaleUp/maintenance_cluster_hana_running.json b/test/json/angi-ScaleUp/maintenance_cluster_hana_running.json new file mode 100644 index 00000000..28341f5f --- /dev/null +++ b/test/json/angi-ScaleUp/maintenance_cluster_hana_running.json @@ -0,0 +1,31 @@ +{ + "test": "maintenance_cluster_hana_running", + "name": "stop and restart cluster, keep hana_running", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "final40", + "loop": 1, + "wait": 1, + "post": "shell sct_test_maintenance_cluster_hana_running", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "final40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/angi-ScaleUp/maintenance_with_standby_nodes.json b/test/json/angi-ScaleUp/maintenance_with_standby_nodes.json index 6b064b31..8d4175ec 100644 --- a/test/json/angi-ScaleUp/maintenance_with_standby_nodes.json +++ b/test/json/angi-ScaleUp/maintenance_with_standby_nodes.json @@ -44,8 +44,8 @@ ], "pHost": "pHostUp", "sHost": [ - "clone_state == DEMOTED" , - "roles == master1::worker:" , + "clone_state == DEMOTED", + "roles == master1::worker:", "score ~ (-INFINITY|0)" ] }, @@ -88,8 +88,8 @@ ], "pHost": "pHostDown", "sHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -101,17 +101,17 @@ "post": "opn", "wait": 2, "pSite": [ - "lss == 1" , - "srr == P" , - "lpt == 10" , - "srHook ~ (SWAIT|SFAIL)" , + "lss == 1", + "srr == P", + "lpt == 10", + "srHook ~ (SWAIT|SFAIL)", "srPoll == SFAIL" ], "sSite": "pSiteUp", "pHost": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , - "score == 150" , + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", "standby == on" ], "sHost": "pHostUp" diff --git a/test/json/angi-ScaleUp/one_stable_hour.json b/test/json/angi-ScaleUp/one_stable_hour.json new file mode 100644 index 00000000..5e1940f3 --- /dev/null +++ b/test/json/angi-ScaleUp/one_stable_hour.json @@ -0,0 +1,90 @@ +{ + "test": "one_stable_hour", + "name": "one_stable_hour - like nop check regulary for one hour that there is no fault", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "stable10", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable10", + "name": "check stable (10/60)", + "next": "stable30", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable20", + "name": "check stable (20/60)", + "next": "stable30", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable30", + "name": "check stable (30/60)", + "next": "stable40", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable40", + "name": "check stable (40/60)", + "next": "stable50", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable50", + "name": "check stable (50/60)", + "next": "final60", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "final60", + "name": "check stable (60/60)", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/angi-ScaleUp/split_brain_prio.json b/test/json/angi-ScaleUp/split_brain_prio.json index 9bbc1cbe..0f3beff7 100644 --- a/test/json/angi-ScaleUp/split_brain_prio.json +++ b/test/json/angi-ScaleUp/split_brain_prio.json @@ -22,10 +22,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -35,8 +35,8 @@ "srPoll == SFAIL" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ] }, @@ -48,10 +48,10 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -62,13 +62,13 @@ "srPoll ~ (SFAIL|SOK)" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state ~ (UNDEFINED|DEMOTED)" , - "roles == master1::worker:" , + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", "score ~ (-INFINITY|0|-1)" ] }, diff --git a/test/json/angi-ScaleUp/standby_prim_node.json b/test/json/angi-ScaleUp/standby_prim_node.json index a21f8ad6..268d82c8 100644 --- a/test/json/angi-ScaleUp/standby_prim_node.json +++ b/test/json/angi-ScaleUp/standby_prim_node.json @@ -22,10 +22,10 @@ "loop": 120, "wait": 2, "pSite": [ - "lss == 1" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "lss == 1", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -36,14 +36,14 @@ "srPoll == SOK" ], "pHost": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , - "score == 150" , + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", "standby == on" ], "sHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score ~ (100|145)" ] }, @@ -55,10 +55,10 @@ "post": "opn", "wait": 2, "pSite": [ - "lss == 1" , - "srr == P" , - "lpt == 10" , - "srHook == SWAIT" , + "lss == 1", + "srr == P", + "lpt == 10", + "srHook == SWAIT", "srPoll == SFAIL" ], "sSite": [ @@ -69,14 +69,14 @@ "srPoll == PRIM" ], "pHost": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , - "score == 150" , + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", "standby == on" ], "sHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ] }, diff --git a/test/json/angi-ScaleUp/standby_secn_node.json b/test/json/angi-ScaleUp/standby_secn_node.json index 514de4ad..dfad5c47 100644 --- a/test/json/angi-ScaleUp/standby_secn_node.json +++ b/test/json/angi-ScaleUp/standby_secn_node.json @@ -16,50 +16,50 @@ "sHost": "sHostUp" }, { - "step": "step20", - "name": "node is standby", - "next": "step30", - "loop": 120, - "wait": 2, - "post": "osn", - "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , - "srPoll == PRIM" - ], - "sSite": [ - "lpt == 10", - "lss == 1", - "srr == S", - "srHook == SFAIL", - "srPoll == SFAIL" - ], - "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "step": "step20", + "name": "node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "post": "osn", + "pSite": [ + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", + "srPoll == PRIM" + ], + "sSite": [ + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SFAIL", + "srPoll == SFAIL" + ], + "pHost": [ + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state == UNDEFINED" , - "roles == master1::worker:" , - "score == 100" , + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 100", "standby == on" ] }, { - "step": "step30", - "name": "node back online", - "next": "final40", - "loop": 120, - "wait": 2, - "todo": "pHost+sHost to check site-name", - "pSite": [ - "lss == 4" , - "srr == P" , - "lpt > 1000000000" , - "srHook == PRIM" , + "step": "step30", + "name": "node back online", + "next": "final40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM", "srPoll == PRIM" ], "sSite": [ @@ -70,13 +70,13 @@ "srPoll == SFAIL" ], "pHost": [ - "clone_state == PROMOTED" , - "roles == master1:master:worker:master" , + "clone_state == PROMOTED", + "roles == master1:master:worker:master", "score == 150" ], "sHost": [ - "clone_state == DEMOTED" , - "roles == master1::worker:" , + "clone_state == DEMOTED", + "roles == master1::worker:", "score ~ (-INFINITY|0)" ] }, diff --git a/test/json/classic-ScaleOut/block_manual_takeover.json b/test/json/classic-ScaleOut/block_manual_takeover.json index 41863d25..7f47b07a 100644 --- a/test/json/classic-ScaleOut/block_manual_takeover.json +++ b/test/json/classic-ScaleOut/block_manual_takeover.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "bmt", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -22,6 +23,7 @@ "loop": 1, "wait": 1, "post": "sleep 120", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -33,6 +35,7 @@ "next": "END", "loop": 1, "wait": 1, + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/defaults.json b/test/json/classic-ScaleOut/defaults.json index fb1840a6..0a248668 100644 --- a/test/json/classic-ScaleOut/defaults.json +++ b/test/json/classic-ScaleOut/defaults.json @@ -2,58 +2,73 @@ "opMode": "logreplay", "srMode": "sync", "checkPtr": { - "globalUp": [ - "topology=ScaleOut" + "sync_state_sok": [ + "sync_state == SOK" + ], + "sync_state_sfail": [ + "sync_state == SFAIL" + ], + "sync_state_sna": [ + "sync_state == SNA" + ], + "sync_state_sok_or_sfail": [ + "sync_state ~ (SOK|SFAIL)" ], "pHostUp": [ - "clone_state=PROMOTED", - "roles=master1:master:worker:master", - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "pSiteUp": [ - "lpt=1[6-9]........", - "lss=4", - "srr=P", - "srHook=PRIM", - "srPoll=PRIM" + "lpt > 1000000000", + "lss == 4", + "srr == P", + "srHook == PRIM" ], "sSiteUp": [ - "lpt=30", - "lss=4", - "srr=S", - "srHook=SOK", - "srPoll=SOK" + "lpt == 30", + "lss == 4", + "srr == S", + "srHook == SOK" ], "sHostUp": [ - "clone_state=DEMOTED", - "roles=master1:master:worker:master", - "score=100" + "clone_state == DEMOTED", + "roles == master1:master:worker:master", + "score == 100" ], "pHostDown": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", + "standby == on" ], "pSiteDown": [ - "lpt=1[6-9]........" , - "lss=1" , - "srr=P" , - "srHook=PRIM" , - "srPoll=PRIM" + "lpt > 1000000000", + "lss == 1", + "srr == P", + "srHook == PRIM" ], "sSiteDown": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=SFAIL" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SFAIL" ], "sHostDown": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=100" , - "standby=on" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 100", + "standby == on" + ], + "pWorkerUp": [ + "clone_state == DEMOTED", + "roles == slave:slave:worker:slave", + "score == -12200" + ], + "sWorkerUp": [ + "clone_state == DEMOTED", + "roles == slave:slave:worker:slave", + "score == -12200" ] } } diff --git a/test/json/classic-ScaleOut/flup.json b/test/json/classic-ScaleOut/flup.json new file mode 100644 index 00000000..e075be42 --- /dev/null +++ b/test/json/classic-ScaleOut/flup.json @@ -0,0 +1,32 @@ +{ + "test": "flup", + "name": "flup - like nop but very short sleep only - only for checking the test engine", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "final40", + "loop": 1, + "wait": 1, + "post": "sleep 4", + "global": "sync_state_sok", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "final40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "global": "sync_state_sok", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/free_log_area.json b/test/json/classic-ScaleOut/free_log_area.json index f1708d42..78e3b48c 100644 --- a/test/json/classic-ScaleOut/free_log_area.json +++ b/test/json/classic-ScaleOut/free_log_area.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "shell test_free_log_area", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -22,6 +23,7 @@ "loop": 1, "wait": 1, "post": "sleep 60", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -33,6 +35,7 @@ "next": "END", "loop": 1, "wait": 1, + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/kill_prim_indexserver.json b/test/json/classic-ScaleOut/kill_prim_indexserver.json index 47e0ed1a..14c71bda 100644 --- a/test/json/classic-ScaleOut/kill_prim_indexserver.json +++ b/test/json/classic-ScaleOut/kill_prim_indexserver.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_prim_indexserver", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -19,32 +20,30 @@ "step": "step20", "name": "failure detected", "next": "step30", - "loop": 120, + "loop": 180, "wait": 2, - "comment": "sSite: srPoll could get SFAIL on scale-out", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=(1|2)" , - "srr=P" , - "lpt=(1[6-9]........|20)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:(20)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "roles=master1::worker:" , - "score=(90|70|5|0)" + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "roles == master1::worker:", + "score ~ (90|70|5|0)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -54,30 +53,29 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(90|70|5)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (90|70|5)" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" , - "srah=T" + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", + "srah == T" ] }, { @@ -88,6 +86,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/kill_prim_inst.json b/test/json/classic-ScaleOut/kill_prim_inst.json index 78260e10..ab5fdc5a 100644 --- a/test/json/classic-ScaleOut/kill_prim_inst.json +++ b/test/json/classic-ScaleOut/kill_prim_inst.json @@ -10,8 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_prim_inst", - "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", - "todo1": "allow something like lss>2, lpt>10000, score!=123", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -23,30 +22,28 @@ "next": "step30", "loop": 120, "wait": 2, - "comment": "sSite: srPoll could get SFAIL on scale-out", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=(1|2)" , - "srr=P" , - "lpt=(1[6-9]........|20)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:(20)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "roles=master1::worker:" , - "score=(90|70|5|0)" + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "roles == master1::worker:", + "score ~ (90|70|5|0)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -55,31 +52,29 @@ "next": "final40", "loop": 120, "wait": 2, - "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(90|70|5)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (90|70|5)" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" , - "srah=T" + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", + "srah == T" ] }, { @@ -90,6 +85,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/kill_prim_node.json b/test/json/classic-ScaleOut/kill_prim_node.json index cad372b6..f44bfa60 100644 --- a/test/json/classic-ScaleOut/kill_prim_node.json +++ b/test/json/classic-ScaleOut/kill_prim_node.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_prim_node", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,26 +22,25 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=(PRIM|SOK)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook ~ (PRIM|SOK)" ], "pHost": [ ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)", - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -50,28 +50,27 @@ "loop": 300, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok", "pSite": [ - "lss=(1|2)", - "srr=(P|S)" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=(PRIM|SFAIL)" + "lss ~ (1|2)", + "srr ~ (P|S)", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=(SOK|PRIM)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED|WAITING4NODES)" , - "roles=master1::worker:" + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", + "roles == master1::worker:" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145|150)" + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145|150)" ] }, { @@ -82,6 +81,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/kill_prim_worker_indexserver.json b/test/json/classic-ScaleOut/kill_prim_worker_indexserver.json index dc6caba4..7c5b24ae 100644 --- a/test/json/classic-ScaleOut/kill_prim_worker_indexserver.json +++ b/test/json/classic-ScaleOut/kill_prim_worker_indexserver.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_prim_worker_indexserver", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,28 +22,27 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=(1|2)" , - "srr=P" , - "lpt=(1[6-9]........|20)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:(20)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "score=(90|70|5|0)" + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "score ~ (90|70|5|0)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)", - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -53,29 +53,28 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "todo2": "why do we need SFAIL for srHook?", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=(PRIM|SFAIL)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook ~ (PRIM|SFAIL)" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "score=(90|70|5)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "score ~ (90|70|5)" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" , - "srah=T" + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", + "srah == T" ] }, { @@ -86,6 +85,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/kill_prim_worker_inst.json b/test/json/classic-ScaleOut/kill_prim_worker_inst.json index 36c1232d..3b87f7f6 100644 --- a/test/json/classic-ScaleOut/kill_prim_worker_inst.json +++ b/test/json/classic-ScaleOut/kill_prim_worker_inst.json @@ -10,8 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_prim_worker_inst", - "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", - "todo1": "allow something like lss>2, lpt>10000, score!=123", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -23,28 +22,27 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=(1|2)" , - "srr=P" , - "lpt=(1[6-9]........|20)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:(20)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "score=(90|70|5|0)" + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "score ~ (90|70|5|0)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)", - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -54,30 +52,29 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:", - "score=(90|70|5)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (90|70|5)" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" , - "srah=T" + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", + "srah == T" ] }, { @@ -88,6 +85,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/kill_prim_worker_node.json b/test/json/classic-ScaleOut/kill_prim_worker_node.json index 663372a7..3d084620 100644 --- a/test/json/classic-ScaleOut/kill_prim_worker_node.json +++ b/test/json/classic-ScaleOut/kill_prim_worker_node.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_prim_worker_node", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,28 +22,27 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=(PRIM|SOK)", - "srPoll=(SOK|SFAIL)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(DEMOTED|UNDEFINED|WAITING4NODES)" , - "score=(90|70|5)" + "clone_state ~ (DEMOTED|UNDEFINED|WAITING4NODES)", + "score ~ (90|70|5)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)", - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -52,28 +52,27 @@ "loop": 240, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok", "pSite": [ - "lss=(1|2)", - "srr=(P|S)" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=(PRIM|SFAIL)" + "lss ~ (1|2)", + "srr ~ (P|S)", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=(SOK|PRIM)" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED|WAITING4NODES)" , - "roles=master1::worker:" + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", + "roles == master1::worker:" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145|150)" + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145|150)" ] }, { @@ -84,6 +83,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/kill_secn_indexserver.json b/test/json/classic-ScaleOut/kill_secn_indexserver.json index 4f500059..81dcbae5 100644 --- a/test/json/classic-ScaleOut/kill_secn_indexserver.json +++ b/test/json/classic-ScaleOut/kill_secn_indexserver.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_secn_indexserver", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,29 +22,28 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=(10|30)", - "lss=(1|2)", - "srr=S", - "srHook=SFAIL", - "srPoll=(SFAIL|SOK)" + "lpt ~ (10|30)", + "lss ~ (1|2)", + "srr == S", + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "clone_state == DEMOTED", + "roles == master1::worker:", + "score ~ (-INFINITY|0)" ] }, { @@ -53,29 +53,28 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=(SFAIL|SOK)" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score ~ (-INFINITY|0|-1)" ] }, { @@ -86,6 +85,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sCCC to be the same as at test begin", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/kill_secn_inst.json b/test/json/classic-ScaleOut/kill_secn_inst.json index 2db5e9b2..41f93c30 100644 --- a/test/json/classic-ScaleOut/kill_secn_inst.json +++ b/test/json/classic-ScaleOut/kill_secn_inst.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_secn_inst", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,29 +22,28 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=(10|30)", - "lss=(1|2)", - "srr=S", - "srHook=SFAIL", - "srPoll=(SFAIL|SOK)" + "lpt ~ (10|30)", + "lss ~ (1|2)", + "srr == S", + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "clone_state == DEMOTED", + "roles == master1::worker:", + "score ~ (-INFINITY|0)" ] }, { @@ -53,29 +53,28 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=(1|2)", - "srr=S", - "srHook=(SFAIL|SWAIT)", - "srPoll=(SFAIL|SOK)" + "lpt == 10", + "lss ~ (1|2)", + "srr == S", + "srHook ~ (SFAIL|SWAIT)" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (-INFINITY|0|-1)" ] }, { @@ -85,6 +84,7 @@ "loop": 240, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/kill_secn_node.json b/test/json/classic-ScaleOut/kill_secn_node.json index a5febca2..795f6eda 100644 --- a/test/json/classic-ScaleOut/kill_secn_node.json +++ b/test/json/classic-ScaleOut/kill_secn_node.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_secn_node", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,24 +22,23 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=SFAIL" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ] }, { @@ -48,29 +48,28 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=(1|2)", - "srr=S", - "srHook=(SFAIL|SWAIT)", - "srPoll=(SFAIL|SOK)" + "lpt == 10", + "lss ~ (1|2)", + "srr == S", + "srHook ~ (SFAIL|SWAIT)" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (-INFINITY|0|-1)" ] }, { @@ -80,6 +79,7 @@ "loop": 120, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/kill_secn_worker_inst.json b/test/json/classic-ScaleOut/kill_secn_worker_inst.json index 6a00bf97..543f21ae 100644 --- a/test/json/classic-ScaleOut/kill_secn_worker_inst.json +++ b/test/json/classic-ScaleOut/kill_secn_worker_inst.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_secn_worker_inst", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,19 +22,19 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok_or_sfail", "pSite": "pSiteUp", "sSite": [ - "lpt=(10|30)", - "lss=(1|2)", - "srr=S", - "srHook=(SFAIL|SWAIT)", - "srPoll=(SFAIL|SOK)" + "lpt ~ (10|30)", + "lss ~ (1|2)", + "srr == S", + "srHook ~ (SFAIL|SWAIT)" ], "pHost": "pHostUp", "sHost": [ - "clone_state=(DEMOTED|UNDEFINED)" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "clone_state ~ (DEMOTED|UNDEFINED)", + "roles == master1::worker:", + "score ~ (-INFINITY|0)" ] }, { @@ -43,19 +44,19 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": "pSiteUp", "sSite": [ - "lpt=10", - "lss=(1|2)", - "srr=S", - "srHook=(SFAIL|SWAIT)", - "srPoll=(SFAIL|SOK)" + "lpt == 10", + "lss ~ (1|2)", + "srr == S", + "srHook ~ (SFAIL|SWAIT)" ], "pHost": "pHostUp", "sHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (-INFINITY|0|-1)" ] }, { @@ -65,6 +66,7 @@ "loop": 120, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/kill_secn_worker_node.json b/test/json/classic-ScaleOut/kill_secn_worker_node.json index ddbaad16..3cf33305 100644 --- a/test/json/classic-ScaleOut/kill_secn_worker_node.json +++ b/test/json/classic-ScaleOut/kill_secn_worker_node.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "kill_secn_worker_node", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,17 +22,17 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sfail", "pSite": "pSiteUp", "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=SFAIL" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SFAIL" ], "pHost": "pHostUp", "sHost": [ - "clone_state=WAITING4NODES" + "clone_state == WAITING4NODES" ] }, { @@ -41,19 +42,19 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sok_or_sfail", "pSite": "pSiteUp", "sSite": [ - "lpt=10", - "lss=(1|2)", - "srr=S", - "srHook=(SFAIL|SWAIT)", - "srPoll=(SFAIL|SOK)" + "lpt == 10", + "lss ~ (1|2)", + "srr == S", + "srHook ~ (SFAIL|SWAIT)" ], "pHost": "pHostUp", "sHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (-INFINITY|0|-1)" ] }, { @@ -63,6 +64,7 @@ "loop": 120, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/maintenance_cluster_turn_hana.json b/test/json/classic-ScaleOut/maintenance_cluster_turn_hana.json index cdf90e80..696d9b4a 100644 --- a/test/json/classic-ScaleOut/maintenance_cluster_turn_hana.json +++ b/test/json/classic-ScaleOut/maintenance_cluster_turn_hana.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "shell test_maintenance_cluster_turn_hana", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -23,6 +24,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/maintenance_with_standby_nodes.json b/test/json/classic-ScaleOut/maintenance_with_standby_nodes.json index feba872b..529a3360 100644 --- a/test/json/classic-ScaleOut/maintenance_with_standby_nodes.json +++ b/test/json/classic-ScaleOut/maintenance_with_standby_nodes.json @@ -13,6 +13,7 @@ "loop": 1, "wait": 1, "post": "ssn", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -37,19 +38,19 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sfail", "pSite": "pSiteUp", "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SWAIT", - "srPoll=SFAIL" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SWAIT" ], "pHost": "pHostUp", "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "clone_state == DEMOTED", + "roles == master1::worker:", + "score ~ (-INFINITY|0)" ] }, { @@ -58,6 +59,7 @@ "next": "step110", "loop": 120, "wait": 2, + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -70,6 +72,7 @@ "loop": 1, "wait": 1, "post": "spn", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -81,19 +84,19 @@ "next": "step130", "loop": 120, "wait": 2, + "global": "sync_state_sok", "pSite": "pSiteDown", "sSite": [ - "lpt=(30|1[6-9]........)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)" ], "pHost": "pHostDown", "sHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -103,19 +106,19 @@ "loop": 120, "post": "opn", "wait": 2, + "global": "sync_state_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=10" , - "srHook=SWAIT" , - "srPoll=SFAIL" + "lss == 1", + "srr == P", + "lpt == 10", + "srHook == SWAIT" ], "sSite": "pSiteUp", "pHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", + "standby == on" ], "sHost": "pHostUp" }, @@ -127,6 +130,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/nop-false.json b/test/json/classic-ScaleOut/nop-false.json index 46924104..b2dfd1b3 100644 --- a/test/json/classic-ScaleOut/nop-false.json +++ b/test/json/classic-ScaleOut/nop-false.json @@ -11,7 +11,7 @@ "wait": 1, "post": "sleep 240", "global": [ - "topology=Nix" + "topology == Nix" ], "pSite": "pSiteUp", "sSite": "sSiteUp", @@ -24,6 +24,7 @@ "next": "END", "loop": 1, "wait": 1, + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/nop.json b/test/json/classic-ScaleOut/nop.json index 31d4111e..d98cc188 100644 --- a/test/json/classic-ScaleOut/nop.json +++ b/test/json/classic-ScaleOut/nop.json @@ -10,7 +10,7 @@ "loop": 1, "wait": 1, "post": "sleep 240", - "global": "globalUp", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -22,6 +22,7 @@ "next": "END", "loop": 1, "wait": 1, + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/restart_cluster.json b/test/json/classic-ScaleOut/restart_cluster.json index c59f8e20..26e372a0 100644 --- a/test/json/classic-ScaleOut/restart_cluster.json +++ b/test/json/classic-ScaleOut/restart_cluster.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "shell test_restart_cluster", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -22,6 +23,7 @@ "loop": 120, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/restart_cluster_hana_running.json b/test/json/classic-ScaleOut/restart_cluster_hana_running.json index 25ebf149..891b2122 100644 --- a/test/json/classic-ScaleOut/restart_cluster_hana_running.json +++ b/test/json/classic-ScaleOut/restart_cluster_hana_running.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "shell test_restart_cluster_hana_running", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -22,6 +23,7 @@ "loop": 120, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleOut/restart_cluster_turn_hana.json b/test/json/classic-ScaleOut/restart_cluster_turn_hana.json index fc1a482a..a1d66dce 100644 --- a/test/json/classic-ScaleOut/restart_cluster_turn_hana.json +++ b/test/json/classic-ScaleOut/restart_cluster_turn_hana.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "shell test_restart_cluster_turn_hana", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -23,6 +24,7 @@ "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/standby_prim_node.json b/test/json/classic-ScaleOut/standby_prim_node.json index 1e047088..f5b2f302 100644 --- a/test/json/classic-ScaleOut/standby_prim_node.json +++ b/test/json/classic-ScaleOut/standby_prim_node.json @@ -10,6 +10,7 @@ "loop": 1, "wait": 1, "post": "spn", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -21,30 +22,29 @@ "next": "step30", "loop": 120, "wait": 2, + "global": "sync_state_sok", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 1", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=(30|1[6-9]........)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "lpt >~ 1000000000:(30)", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", + "standby == on" ], "sHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score ~ (100|145)" ] }, { @@ -54,30 +54,29 @@ "loop": 120, "post": "opn", "wait": 2, + "global": "sync_state_sfail", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=10" , - "srHook=SWAIT" , - "srPoll=SFAIL" + "lss == 1", + "srr == P", + "lpt == 10", + "srHook == SWAIT" ], "sSite": [ - "lpt=1[6-9]........", - "lss=4", - "srr=P", - "srHook=PRIM", - "srPoll=PRIM" + "lpt > 1000000000", + "lss == 4", + "srr == P", + "srHook == PRIM" ], "pHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 150", + "standby == on" ], "sHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ] }, { @@ -88,6 +87,7 @@ "wait": 2, "post": "cleanup", "todo": "allow pointer to prereq10", + "global": "sync_state_sok", "pSite": "sSiteUp", "sSite": "pSiteUp", "pHost": "sHostUp", diff --git a/test/json/classic-ScaleOut/standby_secn_node.json b/test/json/classic-ScaleOut/standby_secn_node.json index ae59404c..c1059272 100644 --- a/test/json/classic-ScaleOut/standby_secn_node.json +++ b/test/json/classic-ScaleOut/standby_secn_node.json @@ -12,6 +12,7 @@ "loop": 1, "wait": 1, "post": "ssn", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", @@ -24,30 +25,29 @@ "loop": 120, "wait": 2, "post": "osn", + "global": "sync_state_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=SFAIL" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=100" , - "standby=on" + "clone_state == UNDEFINED", + "roles == master1::worker:", + "score == 100", + "standby == on" ] }, { @@ -57,29 +57,28 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", + "global": "sync_state_sfail", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "lss == 4", + "srr == P", + "lpt > 1000000000", + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SWAIT", - "srPoll=SFAIL" + "lpt == 10", + "lss == 1", + "srr == S", + "srHook == SWAIT" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "clone_state == PROMOTED", + "roles == master1:master:worker:master", + "score == 150" ], "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "clone_state == DEMOTED", + "roles == master1::worker:", + "score ~ (-INFINITY|0)" ] }, { @@ -89,6 +88,7 @@ "loop": 120, "wait": 2, "post": "cleanup", + "global": "sync_state_sok", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleUp/block_sr.json b/test/json/classic-ScaleUp/block_sr.json new file mode 100644 index 00000000..2ff49a24 --- /dev/null +++ b/test/json/classic-ScaleUp/block_sr.json @@ -0,0 +1,50 @@ +{ + "test": "block_sr", + "name": "block sr and check SFAIL attribute; unblock to recover", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "shell sct_test_block_sap_hana_sr", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "check SFAIL", + "next": "final40", + "loop": 60, + "wait": 2, + "post": "shell sct_test_unblock_sap_hana_sr", + "pSite": "pSiteUp", + "sSite": [ + "srHook == SFAIL" + ], + "pHost": "pHostUp", + "sHost": [ + "lpa_@@sid@@_lpt == 10", + "clone_state == DEMOTED", + "roles == 4:S:master1:master:worker:master", + "sync_state == SFAIL", + "score == -INFINITY" + ] + }, + { + "step": "final40", + "name": "still running", + "next": "END", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/defaults.json b/test/json/classic-ScaleUp/defaults.json index 108007d8..36b778af 100644 --- a/test/json/classic-ScaleUp/defaults.json +++ b/test/json/classic-ScaleUp/defaults.json @@ -2,48 +2,47 @@ "opMode": "logreplay", "srMode": "sync", "checkPtr": { - "todo": "lpa_ha1_lpt must be flexible (like lpa_@@sid@@_lpt);", "pHostUp": [ - "clone_state=PROMOTED", - "lpa_ha1_lpt=1[6-9]........", - "roles=4:P:master1:master:worker:master", - "score=150", - "sync_state=PRIM" + "clone_state == PROMOTED", + "lpa_@@sid@@_lpt > 1000000000", + "roles == 4:P:master1:master:worker:master", + "score == 150", + "sync_state == PRIM" ], "pSiteUp": [ - "srHook=PRIM" + "srHook == PRIM" ], "sSiteUp": [ - "srHook=SOK" + "srHook == SOK" ], "sHostUp": [ - "clone_state=DEMOTED", - "roles=4:S:master1:master:worker:master", - "score=100", - "lpa_ha1_lpt=30", - "sync_state=SOK" + "clone_state == DEMOTED", + "roles == 4:S:master1:master:worker:master", + "score == 100", + "lpa_@@sid@@_lpt == 30", + "sync_state == SOK" ], "pHostDown": [ - "clone_state=UNDEFINED" , - "roles=1:P:master1::worker:" , - "score=150", - "standby=on", - "sync_state=PRIM" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == UNDEFINED", + "roles == 1:P:master1::worker:", + "score == 150", + "standby == on", + "sync_state == PRIM" ], "pSiteDown": [ - "lpt=1[6-9]........" , - "srHook=PRIM" + "srHook == PRIM" ], "sSiteDown": [ - "lpt=10", - "srHook=SFAIL" + "srHook == SFAIL" ], "sHostDown": [ - "clone_state=UNDEFINED" , - "roles=1:S:master1::worker:" , - "score=100" , - "standby=on", - "srPoll=SFAIL" + "lpa_@@sid@@_lpt == 10", + "clone_state == UNDEFINED", + "roles == 1:S:master1::worker:", + "score == 100", + "sync_state == SFAIL", + "standby == on" ] } } diff --git a/test/json/classic-ScaleUp/flup.json b/test/json/classic-ScaleUp/flup.json new file mode 100644 index 00000000..30fb9374 --- /dev/null +++ b/test/json/classic-ScaleUp/flup.json @@ -0,0 +1,30 @@ +{ + "test": "flup", + "name": "flup - like nop but very short sleep only - only for checking the test engine", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "final40", + "loop": 1, + "wait": 1, + "post": "sleep 4", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "final40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/free_log_area.json b/test/json/classic-ScaleUp/free_log_area.json index f1708d42..ab2ceb68 100644 --- a/test/json/classic-ScaleUp/free_log_area.json +++ b/test/json/classic-ScaleUp/free_log_area.json @@ -1,6 +1,6 @@ { "test": "free_log_area", - "name": "free log area on primary", + "name": "free hana log area on primary site", "start": "prereq10", "steps": [ { @@ -9,7 +9,7 @@ "next": "step20", "loop": 1, "wait": 1, - "post": "shell test_free_log_area", + "post": "shell sct_test_free_log_area", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleUp/kill_prim_indexserver.json b/test/json/classic-ScaleUp/kill_prim_indexserver.json index e3f7bb18..c499d5b7 100644 --- a/test/json/classic-ScaleUp/kill_prim_indexserver.json +++ b/test/json/classic-ScaleUp/kill_prim_indexserver.json @@ -19,31 +19,27 @@ "step": "step20", "name": "failure detected", "next": "step30", - "loop": 120, + "loop": 180, "wait": 2, "pSite": [ - "lss=(1|2)" , - "srr=P" , - "lpt=(1[6-9]........|20)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "srHook ~ (PRIM|SWAIT)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "roles=master1::worker:" , - "score=(90|5|0)" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state ~ (PROMOTED|WAITING4LPA|UNDEFINED)", + "roles == 1:P:master1::worker:", + "sync_state == PRIM", + "score ~ (150|5|-9000)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "lpa_@@sid@@_lpt >~ 1000000000:(30)", + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == 4:S:master1:master:worker:master", + "sync_state == SOK", + "score ~ (100|145)" ] }, { @@ -54,29 +50,25 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=SOK" + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(90|5)" + "lpa_@@sid@@_lpt >~ 1000000000:(30|10)", + "clone_state ~ (WAITING4LPA|DEMOTED)", + "roles == 1:P:master1::worker:", + "sync_state ~ (PRIM|SFAIL)", + "score ~ (-9000|0)" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" , - "srah=T" + "lpa_@@sid@@_lpt >~ 1000000000:(30)", + "clone_state ~ (DEMOTED|PROMOTED)", + "roles ~ 4:(S|P):master1:master:worker:master", + "score ~ (100|150)", + "sync_state == SOK", + "srah == T" ] }, { diff --git a/test/json/classic-ScaleUp/kill_prim_inst.json b/test/json/classic-ScaleUp/kill_prim_inst.json index 8330d1f2..a8c60b3c 100644 --- a/test/json/classic-ScaleUp/kill_prim_inst.json +++ b/test/json/classic-ScaleUp/kill_prim_inst.json @@ -10,11 +10,9 @@ "loop": 1, "wait": 1, "post": "kill_prim_inst", - "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", - "todo1": "allow something like lss>2, lpt>10000, score!=123", "pSite": "pSiteUp", "sSite": "sSiteUp", - "pHost": "pHostUp", + "pHost": "pHostUp", "sHost": "sHostUp" }, { @@ -24,28 +22,24 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=(1|2)" , - "srr=P" , - "lpt=(1[6-9]........|20)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "roles=master1::worker:" , - "score=(90|5|0)" + "lpa_@@sid@@_lpt >~ 1000000000:(20)", + "clone_state ~ (PROMOTED|DEMOTED|UNDEFINED)", + "roles ~ 1:P:master1::worker:", + "sync_state == PRIM", + "score ~ (-9000|0)" ], "sHost": [ - "clone_state=(PROMOTED|DEMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "lpa_@@sid@@_lpt >~ 1000000000:(30)", + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == 4:S:master1:master:worker:master", + "sync_state == SOK", + "score ~ (100|145)" ] }, { @@ -56,29 +50,25 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss=1" , - "srr=P" , - "lpt=(1[6-9]........|30|20|10)" , - "srHook=(PRIM|SWAIT|SREG)" , - "srPoll=PRIM" + "srHook ~ (PRIM|SWAIT|SREG)" ], "sSite": [ - "lpt=(1[6-9]........|30)", - "lss=4", - "srr=(S|P)", - "srHook=PRIM", - "srPoll=SOK" + "srHook == PRIM" ], "pHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(90|5)" + "lpa_@@sid@@_lpt >~ 1000000000:(10)", + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == 1:P:master1::worker:", + "sync_state ~ (PRIM|SOK)", + "score ~ (-9000|5)" ], "sHost": [ - "clone_state=(DEMOTED|PROMOTED)" , - "roles=master1:master:worker:master" , - "score=(100|145)" , - "srah=T" + "lpa_@@sid@@_lpt >~ 1000000000:(30)", + "clone_state ~ (DEMOTED|PROMOTED)", + "roles ~ 4:(S|P):master1:master:worker:master", + "sync_state == SOK", + "score ~ (100|145)", + "srah == T" ] }, { diff --git a/test/json/classic-ScaleUp/kill_prim_node.json b/test/json/classic-ScaleUp/kill_prim_node.json new file mode 100644 index 00000000..92f8a0f9 --- /dev/null +++ b/test/json/classic-ScaleUp/kill_prim_node.json @@ -0,0 +1,86 @@ +{ + "test": "kill_prim_node", + "name": "Kill primary master node", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "srHook ~ (PRIM|SWAIT|SREG)" + ], + "sSite": [ + "srHook ~ (PRIM|SOK)" + ], + "pHost": [ + "lpa_@@sid@@_lpt ~ (1[6-9]........|20|10)", + "clone_state is None", + "roles is None", + "score is None", + "sync_state is None" + ], + "sHost": [ + "lpa_@@sid@@_lpt ~ (1[6-9]........|30)", + "clone_state ~ (PROMOTED|DEMOTED)", + "roles ~ 4:(S|P):master1:master:worker:master", + "score ~ (100|145)", + "sync_state ~ (SOK|SFAIL)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "final40", + "loop": 180, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "srHook ~ (PRIM|SWAIT|SREG)" + ], + "sSite": [ + "srHook == PRIM" + ], + "pHost": [ + "lpa_@@sid@@_lpt ~ (1[6-9]........|30|20|10)", + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", + "roles == 1:P:master1::worker:", + "sync_state ~ (PRIM|SFAIL)" + ], + "sHost": [ + "lpa_@@sid@@_lpt ~ (1[6-9]........|30)", + "clone_state ~ (DEMOTED|PROMOTED)", + "roles ~ 4:P:master1:master:worker:master", + "score ~ (100|145|150)", + "sync_state == PRIM" + ] + }, + { + "step": "final40", + "name": "end recover", + "next": "END", + "loop": 150, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/kill_secn_indexserver.json b/test/json/classic-ScaleUp/kill_secn_indexserver.json index 4f500059..af5f7aa2 100644 --- a/test/json/classic-ScaleUp/kill_secn_indexserver.json +++ b/test/json/classic-ScaleUp/kill_secn_indexserver.json @@ -22,28 +22,24 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=(10|30)", - "lss=(1|2)", - "srr=S", - "srHook=SFAIL", - "srPoll=(SFAIL|SOK)" + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ], "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "lpa_@@sid@@_lpt ~ (10|30)", + "clone_state == DEMOTED", + "roles ~ (1|2):S:master1::worker:", + "sync_state ~ (SFAIL|SOK)", + "score ~ (-INFINITY|0)" ] }, { @@ -54,28 +50,24 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=(SFAIL|SOK)" + "srHook ~ (SFAIL|SWAIT)" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ], "sHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "lpa_@@sid@@_lpt == 10", + "clone_state == UNDEFINED", + "roles == 1:S:master1::worker:", + "sync_state ~ (SFAIL|SOK)", + "score ~ (-INFINITY|0|-1)" ] }, { diff --git a/test/json/classic-ScaleUp/kill_secn_inst.json b/test/json/classic-ScaleUp/kill_secn_inst.json index ac57eb18..0464f498 100644 --- a/test/json/classic-ScaleUp/kill_secn_inst.json +++ b/test/json/classic-ScaleUp/kill_secn_inst.json @@ -22,28 +22,24 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=(10|30)", - "lss=(1|2)", - "srr=S", - "srHook=SFAIL", - "srPoll=(SFAIL|SOK)" + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ], "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "lpa_@@sid@@_lpt ~ (10|30)", + "clone_state == DEMOTED", + "roles ~ (1|2):S:master1::worker:", + "sync_state ~ (SFAIL|SOK)", + "score ~ (-INFINITY|0)" ] }, { @@ -54,28 +50,24 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=(1|2)", - "srr=S", - "srHook=(SFAIL|SWAIT)", - "srPoll=(SFAIL|SOK)" + "srHook ~ (SFAIL|SWAIT)" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ], "sHost": [ - "clone_state=(UNDEFINED|DEMOTED)" , - "roles=master1::worker:" , - "score=(-INFINITY|0|-1)" + "lpa_@@sid@@_lpt == 10", + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles ~ (1|2):S:master1::worker:", + "sync_state ~ (SFAIL|SOK)", + "score ~ (-INFINITY|0|-1)" ] }, { diff --git a/test/json/classic-ScaleUp/kill_secn_node.json b/test/json/classic-ScaleUp/kill_secn_node.json new file mode 100644 index 00000000..223cdeb1 --- /dev/null +++ b/test/json/classic-ScaleUp/kill_secn_node.json @@ -0,0 +1,85 @@ +{ + "test": "kill_secn_node", + "name": "Kill secondary master node", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 180, + "wait": 2, + "pSite": [ + "srHook == PRIM" + ], + "sSite": [ + "srHook == SFAIL" + ], + "pHost": [ + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" + + ], + "sHost": [ + "lpa_@@sid@@_lpt == 10", + "clone_state is None", + "roles is None", + "score is None" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "final40", + "loop": 180, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "srHook == PRIM" + ], + "sSite": [ + "srHook ~ (SFAIL|SWAIT|SOK)" + ], + "pHost": [ + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "score == 150" + ], + "sHost": [ + "lpa_@@sid@@_lpt == 10", + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == 1:S:master1::worker:", + "sync_state ~ (SFAIL|SWAIT|SOK)", + "score ~ (-INFINITY|0)" + ] + }, + { + "step": "final40", + "name": "end recover", + "next": "END", + "loop": 150, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/maintenance_cluster_hana_running.json b/test/json/classic-ScaleUp/maintenance_cluster_hana_running.json new file mode 100644 index 00000000..00c44e38 --- /dev/null +++ b/test/json/classic-ScaleUp/maintenance_cluster_hana_running.json @@ -0,0 +1,31 @@ +{ + "test": "maintenance_cluster_hana_running", + "name": "stop and restart cluster, keep hana running", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "final40", + "loop": 1, + "wait": 1, + "post": "shell sct_test_maintenance_cluster_hana_running", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "final40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/maintenance_cluster_turn_hana.json b/test/json/classic-ScaleUp/maintenance_cluster_turn_hana.json index cdf90e80..2c821542 100644 --- a/test/json/classic-ScaleUp/maintenance_cluster_turn_hana.json +++ b/test/json/classic-ScaleUp/maintenance_cluster_turn_hana.json @@ -9,7 +9,7 @@ "next": "final40", "loop": 1, "wait": 1, - "post": "shell test_maintenance_cluster_turn_hana", + "post": "shell sct_test_maintenance_cluster_turn_hana", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleUp/maintenance_with_standby_nodes.json b/test/json/classic-ScaleUp/maintenance_with_standby_nodes.json index f9755425..9ae7bdba 100644 --- a/test/json/classic-ScaleUp/maintenance_with_standby_nodes.json +++ b/test/json/classic-ScaleUp/maintenance_with_standby_nodes.json @@ -2,8 +2,6 @@ "test": "maintenance_with_standby_nodes", "name": "standby+online secondary then standby+online primary", "start": "prereq10", - "sid": "HA1", - "mstResource": "ms_SAPHanaCon_HA1_HDB00", "steps": [ { "step": "prereq10", @@ -38,17 +36,15 @@ "todo": "pHost+sHost to check site-name", "pSite": "pSiteUp", "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SWAIT", - "srPoll=SFAIL" + "srHook == SWAIT" ], "pHost": "pHostUp", "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "lpa_@@sid@@_lpt == 10", + "clone_state == DEMOTED", + "roles == 1:S:master1::worker:", + "sync_state == SFAIL", + "score ~ (-INFINITY|0)" ] }, { @@ -82,17 +78,15 @@ "wait": 2, "pSite": "pSiteDown", "sSite": [ - "lpt=(30|1[6-9]........)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srHook ~ (PRIM|SOK)" ], "pHost": "pHostDown", "sHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "lpa_@@sid@@_lpt >~ 1000000000:(30)", + "clone_state == PROMOTED", + "roles == 4:S:master1:master:worker:master", + "score ~ (100|145)", + "sync_state == SOK" ] }, { @@ -103,18 +97,16 @@ "post": "opn", "wait": 2, "pSite": [ - "lss=1" , - "srr=P" , - "lpt=10" , - "srHook=SWAIT" , - "srPoll=SFAIL" + "srHook == PRIM" ], "sSite": "pSiteUp", "pHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "lpa_@@sid@@_lpt == 10", + "clone_state == UNDEFINED", + "roles == 1:P:master1::worker:", + "sync_state == SFAIL", + "score == 150", + "standby == on" ], "sHost": "pHostUp" }, diff --git a/test/json/classic-ScaleUp/one_stable_hour.json b/test/json/classic-ScaleUp/one_stable_hour.json new file mode 100644 index 00000000..b1dff7cd --- /dev/null +++ b/test/json/classic-ScaleUp/one_stable_hour.json @@ -0,0 +1,90 @@ +{ + "test": "one_stable_hour", + "name": "one stable hour - check regulary for one hour that there is no failure", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "stable10", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable10", + "name": "check stable (10/60)", + "next": "stable20", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable20", + "name": "check stable (20/60)", + "next": "stable30", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable30", + "name": "check stable (30/60)", + "next": "stable40", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable40", + "name": "check stable (40/60)", + "next": "stable50", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "stable50", + "name": "check stable (50/60)", + "next": "final60", + "loop": 1, + "wait": 1, + "post": "sleep 600", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "final60", + "name": "check stable (60/60)", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/properties.json b/test/json/classic-ScaleUp/properties.json index cc5691e2..d80c4a8e 100644 --- a/test/json/classic-ScaleUp/properties.json +++ b/test/json/classic-ScaleUp/properties.json @@ -1,5 +1,5 @@ { "sid": "HA1", - "mstResource": "ms_SAPHanaCon_HA1_HDB00", - "clnResource": "cln_SAPHanaTop_HA1_HDB00" + "mstResource": "ms_SAPHana_HA1_HDB00", + "clnResource": "cln_SAPHanaTopology_HA1_HDB00" } diff --git a/test/json/classic-ScaleUp/restart_cluster.json b/test/json/classic-ScaleUp/restart_cluster.json index c59f8e20..a783f159 100644 --- a/test/json/classic-ScaleUp/restart_cluster.json +++ b/test/json/classic-ScaleUp/restart_cluster.json @@ -9,7 +9,7 @@ "next": "final40", "loop": 1, "wait": 1, - "post": "shell test_restart_cluster", + "post": "shell sct_test_restart_cluster", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleUp/restart_cluster_turn_hana.json b/test/json/classic-ScaleUp/restart_cluster_turn_hana.json index fc1a482a..0bcc8a63 100644 --- a/test/json/classic-ScaleUp/restart_cluster_turn_hana.json +++ b/test/json/classic-ScaleUp/restart_cluster_turn_hana.json @@ -9,7 +9,7 @@ "next": "final40", "loop": 1, "wait": 1, - "post": "shell test_restart_cluster_turn_hana", + "post": "shell sct_test_restart_cluster_turn_hana", "pSite": "pSiteUp", "sSite": "sSiteUp", "pHost": "pHostUp", diff --git a/test/json/classic-ScaleUp/split_brain_prio.json b/test/json/classic-ScaleUp/split_brain_prio.json new file mode 100644 index 00000000..b8c6c788 --- /dev/null +++ b/test/json/classic-ScaleUp/split_brain_prio.json @@ -0,0 +1,74 @@ +{ + "test": "split_brain_prio", + "name": "split brain with prio fencing to simulate fence of secondary", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "simulate_split_brain", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "srHook == PRIM" + ], + "sSite": [ + "srHook == SFAIL" + ], + "pHost": "pHostUp", + "sHost": [ + "lpa_@@sid@@_lpt >~ 1000000000:(10)", + "clone_state is None", + "roles is None", + "score is None", + "sync_state is None" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "final40", + "loop": 180, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "srHook == PRIM" + ], + "sSite": [ + "srHook ~ (SFAIL|SWAIT)" + ], + "pHost": "pHostUp", + "sHost": [ + "lpa_@@sid@@_lpt == 10", + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == 1:S:master1::worker:", + "sync_state ~ (SFAIL|SOK)", + "score ~ (-INFINITY|0|-1)" + ] + }, + { + "step": "final40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleUp/standby_prim_node.json b/test/json/classic-ScaleUp/standby_prim_node.json index 1e047088..0754584c 100644 --- a/test/json/classic-ScaleUp/standby_prim_node.json +++ b/test/json/classic-ScaleUp/standby_prim_node.json @@ -22,29 +22,25 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=1" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=(30|1[6-9]........)", - "lss=4", - "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srHook ~ (PRIM|SOK)" ], "pHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == UNDEFINED", + "roles == 1:P:master1::worker:", + "score == 150", + "sync_state == PRIM", + "standby == on" ], "sHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=(100|145)" + "lpa_@@sid@@_lpt >~ 1000000000:(30)", + "clone_state == PROMOTED", + "roles == 4:S:master1:master:worker:master", + "sync_state == SOK", + "score ~ (100|145)" ] }, { @@ -55,29 +51,25 @@ "post": "opn", "wait": 2, "pSite": [ - "lss=1" , - "srr=P" , - "lpt=10" , - "srHook=SWAIT" , - "srPoll=SFAIL" + "srHook == PRIM" ], "sSite": [ - "lpt=1[6-9]........", - "lss=4", - "srr=P", - "srHook=PRIM", - "srPoll=PRIM" + "srHook == PRIM" ], "pHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=150" , - "standby=on" + "lpa_@@sid@@_lpt == 10", + "clone_state == UNDEFINED", + "roles == 1:P:master1::worker:", + "sync_state == SFAIL", + "score == 150", + "standby == on" ], "sHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ] }, { diff --git a/test/json/classic-ScaleUp/standby_secn_node.json b/test/json/classic-ScaleUp/standby_secn_node.json index ae59404c..7f949bd3 100644 --- a/test/json/classic-ScaleUp/standby_secn_node.json +++ b/test/json/classic-ScaleUp/standby_secn_node.json @@ -2,8 +2,6 @@ "test": "standby_secn_node", "name": "standby secondary node (and online again)", "start": "prereq10", - "sid": "HA1", - "mstResource": "ms_SAPHanaCon_HA1_HDB00", "steps": [ { "step": "prereq10", @@ -25,29 +23,25 @@ "wait": 2, "post": "osn", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SFAIL", - "srPoll=SFAIL" + "srHook == SFAIL" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ], "sHost": [ - "clone_state=UNDEFINED" , - "roles=master1::worker:" , - "score=100" , - "standby=on" + "lpa_@@sid@@_lpt == 10", + "clone_state == UNDEFINED", + "roles == 1:S:master1::worker:", + "score == 100", + "sync_state == SFAIL", + "standby == on" ] }, { @@ -58,28 +52,24 @@ "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" + "srHook == PRIM" ], "sSite": [ - "lpt=10", - "lss=1", - "srr=S", - "srHook=SWAIT", - "srPoll=SFAIL" + "srHook == SWAIT" ], "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" + "lpa_@@sid@@_lpt > 1000000000", + "clone_state == PROMOTED", + "roles == 4:P:master1:master:worker:master", + "sync_state == PRIM", + "score == 150" ], "sHost": [ - "clone_state=DEMOTED" , - "roles=master1::worker:" , - "score=(-INFINITY|0)" + "lpa_@@sid@@_lpt == 10", + "clone_state == DEMOTED", + "roles == 1:S:master1::worker:", + "sync_state == SFAIL", + "score ~ (-INFINITY|0)" ] }, { diff --git a/test/saphana_sr_test.py b/test/saphana_sr_test.py old mode 100755 new mode 100644 index bc7ff203..971e3f3a --- a/test/saphana_sr_test.py +++ b/test/saphana_sr_test.py @@ -25,7 +25,7 @@ class SaphanasrTest: """ class to check SAP HANA cluster during tests """ - version = "0.3.3" + version = "1.2.13" def message(self, msg, **kwargs): """ @@ -159,7 +159,7 @@ def read_saphana_sr(self): structure representing the data """ #cmd = [ './helpSAPHanaSR-showAttr', '--format=script' ] - cmd = "SAPHanaSR-showAttr --format=tester" + cmd = "/usr/bin/SAPHanaSR-showAttr --format=tester --select=all" self.dict_sr={} sr_out = "" #self.message("remote node broken !!") @@ -354,7 +354,13 @@ def __get_failed__(self): return None def run_checks(self, checks, area_name, object_name, step_step ): - """ run all checks for area and object """ + """ run all checks for area and object + params: + checks: list of checks to be run + area_name: attribute area to be checked (global, Site, Resource, Host) + object_name: aobject inside area to be checked (ROT, WDF, pizbuin01) + step_step: TBD + """ l_sr = self.dict_sr check_result = -1 self.__reset_failed__() @@ -367,6 +373,15 @@ def run_checks(self, checks, area_name, object_name, step_step ): check_result = 2 break c_key = match_obj.group(1) + # + # rewrite key, if it contains a string @@sid@@ this is needed e.g. to match lpa__lpt + # + #print(f"c_key={c_key}") + match_obj_key = re.search("(.*)@@sid@@(.*)", c_key) + if match_obj_key is not None: + #print(f"match c_key={c_key} group1={match_obj_key.group(1)} group2={match_obj_key.group(2)}") + c_key = match_obj_key.group(1) + self.test_data['sid'].lower() + match_obj_key.group(2) + #print(f"rewrite c_key={c_key}") c_comp = match_obj.group(2) c_reg_exp = match_obj.group(3) c_reg_exp_a = "" @@ -426,6 +441,12 @@ def run_checks(self, checks, area_name, object_name, step_step ): found = 1 c_err = 0 check_result = max(check_result, 0) + else: + # if object does not even exist, the 'None' clause is true + if c_comp == "is" and c_reg_exp == "None": + found = 1 + c_err = 0 + check_result = max(check_result, 0) if c_err == 1: if not found: l_val = None @@ -435,6 +456,11 @@ def run_checks(self, checks, area_name, object_name, step_step ): else: check_result = max(check_result, 0) self.debug(f"DEBUG: PASSED: ckey:{c_key} c_comp:{c_comp} c_reg_exp:{c_reg_exp} c_reg_exp_a:{c_reg_exp_a} c_reg_exp_b:{c_reg_exp_b}") + if c_comp == "is" and c_reg_exp == "None": + # if area does not even exist, the 'None' clause is true + found = 1 + c_err = 0 + check_result = max(check_result, 0) if (found == 0) and (check_result < 2): check_result = 2 if self.config['dump_failures'] and 'failed' in self.run: diff --git a/test/sct_test_block_sap_hana_sr b/test/sct_test_block_sap_hana_sr index b970d344..1e36c5c8 100755 --- a/test/sct_test_block_sap_hana_sr +++ b/test/sct_test_block_sap_hana_sr @@ -1,6 +1,6 @@ #!/bin/bash # -# test_block_sap_hana_sr - block ports for SAP HANA SR in multi-db installation (4xx01 - 4xx03) +# sct_test_block_sap_hana_sr - block ports for SAP HANA SR in multi-db installation (4xx01 - 4xx03) # iptables -D INPUT -p tcp -m multiport --ports 40001,40002,40003 -j DROP # iptables -I INPUT -p tcp -m multiport --ports 40001,40002,40003 -j DROP # about sap hana ports: https://www.stechies.com/hana-services-ports-interview-questions-answer/ @@ -8,7 +8,7 @@ src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" echo "==== Block SAP HANA SR ====" diff --git a/test/sct_test_create_cluster_config b/test/sct_test_create_cluster_config new file mode 100755 index 00000000..400207b9 --- /dev/null +++ b/test/sct_test_create_cluster_config @@ -0,0 +1,29 @@ +#!/bin/bash +# +# sct_test_create_cluster_config - complete cluster config (basic, resources, constraints) +# +source .test_properties + +# +# node all attributes +# +echo "== add cluster configuration ==" +cd /usr/share/SAPHanaSR-tester/samples/crm_cfg/angi-ScaleUp || exit 1 +for configStep in [0-9]*_*; do + echo "$configStep"; + rsync -v "$configStep" "${node01}":/root/crm.txt 2>/dev/null 1>/dev/null + ssh -T "${node01}" </dev/null 1>/dev/null +EOF +done + +ssh -T "${node01}" "cs_wait_for_idle -s 5 2>/dev/null 1>/dev/null" + +# +# take resource out of maintenance +# +echo "== get $mstResource out of maintenance ==" +ssh -T "${node01}" "crm resource maintenance $mstResource off" +ssh -T "${node01}" "cs_wait_for_idle -s 5" 2>/dev/null 1>/dev/null diff --git a/test/sct_test_delete_cluster_config b/test/sct_test_delete_cluster_config new file mode 100755 index 00000000..85b96655 --- /dev/null +++ b/test/sct_test_delete_cluster_config @@ -0,0 +1,48 @@ +#!/bin/bash +# +# sct_test_delete_cluster_config - delete the complete cluster config (CIB) +# +# - set cluster to maintenance +# - erase the cluster config +# +# expected outcome: cluster config (CIB) empty, HANA still running +src=${BASH_SOURCE[0]} +full_path=$(readlink -f "$src") +dir_path=$(dirname "$full_path") +source .test_properties + + +# +# set cluster to maintenance, erase the cluster configuration (CIB) and again set cluster to maintenance +# +echo "== set maintenance mode and erase config ==" +ssh -T "${node01}" "crm maintenance on" 1>/dev/null 2>/dev/null +ssh -T "${node01}" "crm configure erase" 1>/dev/null 2>/dev/null +ssh -T "${node01}" "crm maintenance on" 1>/dev/null 2>/dev/null + +# +# restart the cluster - this deletes transient attributes from the cluster +# +ssh -T "${node01}" "crm cluster stop --all" 1>/dev/null 2>/dev/null +sleep 5 +ssh -T "${node01}" "crm cluster start --all" 1>/dev/null 2>/dev/null +ssh -T "${node01}" "cs_wait_for_idle -s 5" 1>/dev/null 2>/dev/null + +# +# delete persistent node attributes +# +ssh -T "${node01}" </root/crm.txt +crm configure load update /root/crm.txt +rm /root/crm.txt +EOF + +ssh -T "${node01}" "cs_wait_for_idle -s 5" 1>/dev/null 2>/dev/null + +# +# take cluster out of maintenance +# +echo "== end of the cluster wide maintenance ==" +ssh -T "${node01}" "crm maintenance off" 1>/dev/null 2>/dev/null +ssh -T "${node01}" "cs_wait_for_idle -s 5" 1>/dev/null 2>/dev/null +# diff --git a/test/sct_test_free_log_area b/test/sct_test_free_log_area index 60a390f3..7c808517 100755 --- a/test/sct_test_free_log_area +++ b/test/sct_test_free_log_area @@ -1,6 +1,6 @@ #!/bin/bash # -# test_maintenance_cluster_turn_hana - with resource in maintenace: exchange primary/secondary +# sct_test_free_log_area - with resource in maintenace: exchange primary/secondary # # - set mst-resource in maintenance mode # - takeover on secondary with suspend primary (gets new primary) @@ -19,12 +19,12 @@ source .test_properties # shellcheck disable=SC2029 ssh "${node01}" "crm resource cleanup $mstResource" -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" echo "p=$currPrimary, s=$currSecondary" -sitePrimary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") +sitePrimary=$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") sitePrimary="${sitePrimary//\"/}" echo "p=$currPrimary ($sitePrimary), s=$currSecondary" diff --git a/test/sct_test_freeze_prim_fs b/test/sct_test_freeze_prim_fs index fb9866f4..e0c582cc 100755 --- a/test/sct_test_freeze_prim_fs +++ b/test/sct_test_freeze_prim_fs @@ -1,11 +1,11 @@ #!/bin/bash # -# test_freeze_prim_fs - freeze fs in primary side +# sct_test_freeze_prim_fs - freeze fs in primary side src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" echo "==== Freeze SAP HANA FS ====" diff --git a/test/sct_test_freeze_prim_master_nfs b/test/sct_test_freeze_prim_master_nfs index ed818d90..8dcc0e13 100755 --- a/test/sct_test_freeze_prim_master_nfs +++ b/test/sct_test_freeze_prim_master_nfs @@ -1,12 +1,12 @@ #!/bin/bash # -# test_freeze_prim_nfs - freeze nfs on primary master +# sct_test_freeze_prim_master_nfs - freeze nfs on primary master src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currPrimWorker="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimWorker="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" echo "==== Freeze SAP HANA NFS ====" diff --git a/test/sct_test_freeze_prim_site_nfs b/test/sct_test_freeze_prim_site_nfs index 01bd35d3..52ab8b65 100755 --- a/test/sct_test_freeze_prim_site_nfs +++ b/test/sct_test_freeze_prim_site_nfs @@ -1,12 +1,12 @@ #!/bin/bash # -# test_freeze_prim_nfs - freeze nfs on primary site +# sct_test_freeze_prim_site_nfs - freeze nfs on primary site src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currPrimWorker="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimWorker="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" echo "==== Freeze SAP HANA NFS ====" diff --git a/test/sct_test_freeze_secn_site_nfs b/test/sct_test_freeze_secn_site_nfs index f642f62d..9c8d7e8b 100755 --- a/test/sct_test_freeze_secn_site_nfs +++ b/test/sct_test_freeze_secn_site_nfs @@ -1,12 +1,12 @@ #!/bin/bash # -# test_freeze_secn_nfs - freeze nfs on secondary site +# sct_test_freeze_secn_site_nfs - freeze nfs on secondary site src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" -currSecnWorker="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-12200"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" +currSecnWorker="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-12200"/ { print $2 }' )" echo "==== Freeze SAP HANA NFS ====" diff --git a/test/sct_test_maintenance_cluster_hana_running b/test/sct_test_maintenance_cluster_hana_running new file mode 100755 index 00000000..d6d987bd --- /dev/null +++ b/test/sct_test_maintenance_cluster_hana_running @@ -0,0 +1,48 @@ +#!/bin/bash +# +# sct_test_maintenance_cluster_hana_running - restart cluster with SAP HANA resource in maintenance +# +# - cleanup mst-resource +# - set mst-resource in maintenance +# - set cluster in maintenance +# - stop cluster +# - wait some time to simulate a maintenance +# - start cluster +# - wait till cluster is idle +# - end cluster maintenace +# - refresh mst-resource +# - wait till cluster is idle +# - end mst-resource in maintenance +# +# expected outcome: cluster running, SAP HANA instances running, SR roles NOT exchanged +src=${BASH_SOURCE[0]} +full_path=$(readlink -f "$src") +dir_path=$(dirname "$full_path") +source .test_properties + +# shellcheck disable=SC2029 +ssh "$node01" "crm resource cleanup $mstResource" +ssh "$node01" "crm resource maintenance $mstResource on" +ssh "$node01" "crm maintenance on" + +ssh "$node02" "crm cluster stop --all" + +echo "==== SUSE Cluster Stopped ====" + +sleep 60 +ssh "$node01" "crm cluster start --all" + +sleep 120 +ssh "$node01" "cs_wait_for_idle --sleep 60" + +ssh "$node01" "crm resource refresh $clnResource" +ssh "$node01" "crm maintenance off" + +ssh "$node01" "cs_wait_for_idle --sleep 10" +ssh "$node01" "crm resource refresh $mstResource" + +ssh "$node01" "cs_wait_for_idle --sleep 10" +ssh "$node01" "crm resource maintenance $mstResource off" + +echo "==== SUSE Cluster Started ====" + diff --git a/test/sct_test_maintenance_cluster_turn_hana b/test/sct_test_maintenance_cluster_turn_hana index 7b0ba2ef..02b49486 100755 --- a/test/sct_test_maintenance_cluster_turn_hana +++ b/test/sct_test_maintenance_cluster_turn_hana @@ -1,6 +1,6 @@ #!/bin/bash # -# test_maintenance_cluster_turn_hana - with resource in maintenace: exchange primary/secondary +# sct_test_maintenance_cluster_turn_hana - with resource in maintenace: exchange primary/secondary # # - set mst-resource in maintenance mode # - takeover on secondary with suspend primary (gets new primary) @@ -19,8 +19,8 @@ source .test_properties # shellcheck disable=SC2029 ssh "${node01}" "crm resource cleanup $mstResource" -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" # ######################################### @@ -30,8 +30,8 @@ currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/ # ######################################### -vhostPrim="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currPrimary | tr -d '"' )" -vhostSecn="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currSecondary | tr -d '"' )" +vhostPrim="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currPrimary | tr -d '"' )" +vhostSecn="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currSecondary | tr -d '"' )" if [[ -z "$vhostPrim ]]; then vhostPrim="$currPrimary" fi @@ -42,7 +42,7 @@ fi echo "p=$currPrimary, s=$currSecondary vhostPrim=$vhostPrim vhostSecn=$vhostSecn" -sitePrimary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") +sitePrimary=$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") sitePrimary="${sitePrimary//\"/}" echo "p=$currPrimary ($sitePrimary), s=$currSecondary" diff --git a/test/sct_test_properties b/test/sct_test_properties index 2fb03aef..67daa822 100755 --- a/test/sct_test_properties +++ b/test/sct_test_properties @@ -1,6 +1,6 @@ #!/usr/bin/bash # -# test_properties - set your own values here +# sct_test_properties - set your own values here # node01="ifen01" node02="ifen02" diff --git a/test/sct_test_restart_cluster b/test/sct_test_restart_cluster index 52396e19..bef1a0dd 100755 --- a/test/sct_test_restart_cluster +++ b/test/sct_test_restart_cluster @@ -1,6 +1,6 @@ #!/bin/bash # -# test_restart_cluster - restart cluster, start SAP HANA instances while cluster is down +# sct_test_restart_cluster - restart cluster, start SAP HANA instances while cluster is down # # - cleanup mst-resource # - stop cluster (including SAP HANA resources) @@ -13,12 +13,12 @@ full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" echo "p=$currPrimary, s=$currSecondary" -sitePrimary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") +sitePrimary=$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") sitePrimary="${sitePrimary//\"/}" echo "p=$currPrimary ($sitePrimary), s=$currSecondary" diff --git a/test/sct_test_restart_cluster_hana_running b/test/sct_test_restart_cluster_hana_running index e2aa9ad1..2cca398f 100755 --- a/test/sct_test_restart_cluster_hana_running +++ b/test/sct_test_restart_cluster_hana_running @@ -1,6 +1,6 @@ #!/bin/bash # -# test_restart_cluster_hana_running - restart cluster with SAP HANA resource in maintenance +# sct_test_restart_cluster_hana_running - restart cluster with SAP HANA resource in maintenance # # - cleanup mst-resource # - set cluster in maintenance mode @@ -24,13 +24,13 @@ source .test_properties ssh "$node01" "crm resource cleanup $mstResource" ssh "$node01" "crm maintenance on" -ssh "$node02" 'crm cluster run "crm cluster stop"' +ssh "$node02" "crm cluster stop --all" echo "==== SUSE Cluster Stopped ====" sleep 60 -ssh "$node01" 'crm cluster run "crm cluster start"' +ssh "$node01" "crm cluster start --all" sleep 120 ssh "$node01" "cs_wait_for_idle --sleep 60" diff --git a/test/sct_test_restart_cluster_turn_hana b/test/sct_test_restart_cluster_turn_hana index a6422d56..4b4f056a 100755 --- a/test/sct_test_restart_cluster_turn_hana +++ b/test/sct_test_restart_cluster_turn_hana @@ -21,8 +21,8 @@ source .test_properties logger --id -t "sct_test_restart_cluster_turn_hana" -s "Cleanup promotable resource" ssh "${node01}" "crm resource cleanup $mstResource" -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="100"/ { print $2 }' )" ######################################### # VHOSTS @@ -31,8 +31,8 @@ currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/ # ######################################### -vhostPrim="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currPrimary | tr -d '"' )" -vhostSecn="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currSecondary | tr -d '"' )" +vhostPrim="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currPrimary | tr -d '"' )" +vhostSecn="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".*vhost=" { print $2 }' node=$currSecondary | tr -d '"' )" if [[ -z "$vhostPrim ]]; then vhostPrim="$currPrimary" fi @@ -40,9 +40,9 @@ if [[ -z "$vhostSecn ]]; then vhostPrim="$currSecondary" fi -sitePrimary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") +sitePrimary=$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") sitePrimary="${sitePrimary//\"/}" -siteSecondary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currSecondary") +siteSecondary=$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currSecondary") siteSecondary="${siteSecondary//\"/}" logger --id -t "sct_test_restart_cluster_turn_hana" -s "p=$currPrimary ($sitePrimary), s=$currSecondary, vhostPrim=$vhostPrim, vhostSecn=$vhostSecn" diff --git a/test/sct_test_unblock_sap_hana_sr b/test/sct_test_unblock_sap_hana_sr index 75470062..3f9d15d8 100755 --- a/test/sct_test_unblock_sap_hana_sr +++ b/test/sct_test_unblock_sap_hana_sr @@ -1,6 +1,6 @@ #!/bin/bash # -# test_block_sap_hana_sr - block ports for SAP HANA SR in multi-db installation (4xx01 - 4xx03) +# sct_test_unblock_sap_hana_sr - block ports for SAP HANA SR in multi-db installation (4xx01 - 4xx03) # iptables -D INPUT -p tcp -m multiport --ports 40001,40002,40003 -j DROP # iptables -I INPUT -p tcp -m multiport --ports 40001,40002,40003 -j DROP # about sap hana ports: https://www.stechies.com/hana-services-ports-interview-questions-answer/ @@ -8,7 +8,7 @@ src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" echo "==== Un-Block SAP HANA SR ====" diff --git a/test/sct_test_unfreeze_prim_fs b/test/sct_test_unfreeze_prim_fs index 707af20a..8c95e458 100755 --- a/test/sct_test_unfreeze_prim_fs +++ b/test/sct_test_unfreeze_prim_fs @@ -1,11 +1,11 @@ #!/bin/bash # -# test_freeze_prim_fs - freeze fs in primary side +# sct_test_unfreeze_prim_fs - freeze fs in primary side src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" echo "==== Un-Freeze SAP HANA FS ====" diff --git a/test/sct_test_unfreeze_prim_master_nfs b/test/sct_test_unfreeze_prim_master_nfs index ce0d6f13..72902612 100755 --- a/test/sct_test_unfreeze_prim_master_nfs +++ b/test/sct_test_unfreeze_prim_master_nfs @@ -1,12 +1,12 @@ #!/bin/bash # -# test_unfreeze_prim_nfs - unfreeze nfs on primary master +# sct_test_unfreeze_prim_master_nfs - unfreeze nfs on primary master src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currPrimWorker="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimWorker="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" echo "==== Un-Freeze SAP HANA NFS ====" diff --git a/test/sct_test_unfreeze_prim_site_nfs b/test/sct_test_unfreeze_prim_site_nfs index 572d23e6..874fd5ab 100755 --- a/test/sct_test_unfreeze_prim_site_nfs +++ b/test/sct_test_unfreeze_prim_site_nfs @@ -1,12 +1,12 @@ #!/bin/bash # -# test_unfreeze_prim_site_nfs - unfreeze nfs on primary site +# sct_test_unfreeze_prim_site_nfs - unfreeze nfs on primary site src=${BASH_SOURCE[0]} full_path=$(readlink -f "$src") dir_path=$(dirname "$full_path") source .test_properties -currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" -currPrimWorker="$(ssh "${node01}" "SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimWorker="$(ssh "${node01}" "/usr/bin/SAPHanaSR-showAttr --format=tester" | awk -F'/' '/score="-10000"/ { print $2 }' )" echo "==== Un-Freeze SAP HANA NFS ====" diff --git a/tools/SAPHanaSR-upgrade-to-angi-demo b/tools/SAPHanaSR-upgrade-to-angi-demo new file mode 100755 index 00000000..8242c0e5 --- /dev/null +++ b/tools/SAPHanaSR-upgrade-to-angi-demo @@ -0,0 +1,701 @@ +#!/bin/bash +# shellcheck disable=SC2086,SC2317,SC1090,SC2034,SC2046,SC2162 +# +# SAPHanaSR-upgrade-to-angi-demo +# +# (c) 2024 SUSE LLC +# Author: F.Herschel, L.Pinne. +# GNU General Public License v2. No warranty. +# http://www.gnu.org/licenses/gpl.html +# + +# +# define parameters and functions +# +VERSION="2024-03-20 0.3" +DRYRUN=yes +# TODO DRYRUN=no +EXE=$(basename $0) +TMP=/run/"$EXE"."$RANDOM" +TIMEST=$(date +%s) +ALL_RC=0 + +# TODO how to use templates from package SAPHanaSR-angi? +CIB_MSTTMP_ANG="# +primitive rsc_SAPHanaCon_@@sid@@_HDB@@ino@@ ocf:suse:SAPHanaController \ + op start interval=0 timeout3600 \ + op stop interval=0 timeout=3600 \ + op promote interval=0 timeout=900 \ + op demote interval=0 timeout=320 \ + op monitor interval=60 role=Promoted timeout=700 \ + op monitor interval=61 role=Unpromoted timeout=700 \ + params SID=@@sid@@ InstanceNumber=@@ino@@ PREFER_SITE_TAKEOVER=true \ + DUPLICATE_PRIMARY_TIMEOUT=7200 AUTOMATED_REGISTER=true \ + meta maintenance=true +# +clone @@mstnew@@ rsc_SAPHanaCon_@@sid@@_HDB@@ino@@ \ + meta clone-node-max=1 promotable=true interleave=true maintenance=true +# +order ord_SAPHanaTop_first Optional: @@clntop@@ @@mstnew@@ +# +colocation col_SAPHanaCon_ip_@@sid@@_HDB@@ino@@ 2000: @@rscipa@@:Started @@mstnew@@:Promoted +#" + +CIB_CLNTMP_ANG="# +primitive rsc_SAPHanaTop_@@sid@@_HDB@@ino@@ ocf:suse:SAPHanaTopology \ + op start interval=0 timeout=600 \ + op stop interval=0 timeout=600 \ + op monitor interval=50 timeout=600 \ + params SID=@@sid@@ InstanceNumber=@@ino@@ +# +clone @@clntop@@ rsc_SAPHanaTop_@@sid@@_HDB@@ino@@ \ + meta clone-node-max=1 interleave=true +# +order ord_SAPHanaTop_first Optional: @@clntop@@ @@mstnew@@ +#" + +CIB_CLNTMP_FIL="# +primitive rsc_SAPHanaFil_@@sid@@_HDB@@ino@@ ocf:suse:SAPHanaFilesystem \ + op start interval=0 timeout=10 \ + op stop interval=0 timeout=20 on-fail=fence \ + op monitor interval=120 timeout=120 \ + params SID=@@sid@@ InstanceNumber=@@ino@@ +# +clone cln_SAPHanaFil_@@sid@@_HDB@@ino@@ rsc_SAPHanaFil_@@sid@@_HDB@@ino@@ \ + meta clone-node-max=1 interleave=true +#" + +function echo-funa() { + echo + echo "######## $1 $2 #########" + echo +} + +function wait-idle() { + echo "cs_wait_for_idle -s 3 >/dev/null" + cs_wait_for_idle -s 3 >/dev/null +} + +function init-variables() { + BAKDIR=/root/"${EXE}.$TIMEST" + mkdir -p "$BAKDIR" + cibadmin -Ql > "$BAKDIR"/cib.xml || exit 1 + SCRIPT=/root/bin/"$EXE" + RPMOLD="SAPHanaSR" + RPMNEW="SAPHanaSR-angi" + SID=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances |\ + awk '{print $4}') + INO=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances |\ + awk '{print $6}') + sid="${SID,,}" + sidadm="${sid}adm" + MSTOLD=$(xmllint -xpath \ + "string(///resources//*[@type='SAPHana']/instance_attributes/nvpair[@name='SID'][@value='$SID']/../../../@id)" "$BAKDIR/cib.xml") + # MSTOLD=$(SAPHanaSR-showAttr --format script |\ + # awk -F"/" '$1~/Resource/ && $2~/ms.*'$SID'/ && $3~/maintenance=/ {print $2}') + RSCCON=$(xmllint -xpath "string(///resources//*[@type='SAPHana']/@id)" "$BAKDIR/cib.xml") + # RSCCON=$(crm configure show type:clone |\ + # awk '$1=="clone" && $2=="'$MSTOLD'" {print $3}') + MSTNEW="mst_SAPHanaCon_${SID}_HDB${INO}" + CLNTOP=$(xmllint -xpath \ + "string(///resources//*[@type='SAPHanaTopology']/instance_attributes/nvpair[@name='SID'][@value='$SID']/../../../@id)" "$BAKDIR/cib.xml") + # CLNTOP=$(crm configure show type:order |\ + # awk '$1=="order" && $5=="'$MSTOLD'" {print $4}') + RSCTOP=$(xmllint -xpath "\ + string(///resources//*[@type='SAPHanaTopology']/@id)" "$BAKDIR/cib.xml") + # RSCTOP=$(crm configure show type:clone |\ + # awk '$1=="clone" && $2=="'$CLNTOP'" {print $3}') + CLNNEW=cln_SAPHanaTop_${SID}_HDB${INO} + MSTORD=$(xmllint -xpath \ + "string(///constraints//*[@then='$MSTOLD']/@id)" $BAKDIR/cib.xml) + # MSTORD=$(crm configure show type:order |\ + # awk '$1=="order" && $4=="'$CLNTOP'" && $5=="'$MSTOLD'" {print $2}') + MSTCOL=$(xmllint -xpath \ + "string(///constraints//*[@with-rsc='$MSTOLD']/@id)" $BAKDIR/cib.xml) + # MSTCOL=$(crm configure show type:colocation |\ + # awk '$1=="colocation" && $5=="'$MSTOLD':Master" {print $2}') + CLNFIL=cln_SAPHanaFil_${SID}_HDB${INO} + # TODO RSCIPA=$(xmllint ...) + RSCIPA=$(crm configure show type:colocation |\ + awk '$1=="colocation" && $5=="'$MSTOLD':Master" {print $4}' |\ + awk -F: '{print $1}') + PRINOD=$(SAPHanaSR-showAttr --format script |\ + awk -F"/" '$1~/Host/&&$3=="score=\"150\"" {print $2}') + SECNOD=$(SAPHanaSR-showAttr --format script |\ + awk -F"/" '$1~/Host/&&$3=="score=\"100\"" {print $2}') + GLBINI="/hana/shared/$SID/global/hdb/custom/config/global.ini" + SUDOER=$(grep "${sidadm}.ALL.*NOPASSWD.*crm_attribute" \ + /etc/sudoers /etc/sudoers.d/* | awk -F":" '{print $1}' | sort -u) + [ -z $SUDOER ] && SUDOER="/etc/sudoers.d/SAPHanaSR" + scp $SCRIPT root@${SECNOD}:$SCRIPT >/dev/null + ( echo "SID=$SID" + echo "sid=$sid" + echo "sidadm=$sidadm" + echo "INO=$INO" + echo "RPMOLD=$RPMOLD" + echo "RPMNEW=$RPMNEW" + echo "MSTOLD=$MSTOLD" + echo "MSTCOL=$MSTCOL" + echo "MSTORD=$MSTORD" + echo "MSTNEW=$MSTNEW" + echo "RSCCON=$RSCCON" + echo "CLNTOP=$CLNTOP" + echo "RCSTOP=$RSCTOP" + echo "CLNFIL=$CLNFIL" + echo "RSCIPA=$RSCIPA" + echo "MSTORD=$MSTORD" + echo "MSTCOL=$MSTCOL" + echo "PRINOD=$PRINOD" + echo "SECNOD=$SECNOD" + echo "GLBINI=$GLBINI" + echo "SUDOER=$SUDOER" + echo "BAKDIR=$BAKDIR" + echo "SCRIPT=$SCRIPT" + echo "TIMEST=$TIMEST" + echo "DRYRUN=$DRYRUN" + echo "TMP=$TMP" ) > /run/$EXE.variables + scp /run/$EXE.variables root@${SECNOD}:/run/$EXE.variables >/dev/null +} + +function show-variables() { + echo + cat /run/$EXE.variables + echo +} + +function make-backup-local() { + EXE=$(basename $0) + source /run/$EXE.variables + echo "mkdir $BAKDIR" + mkdir -p $BAKDIR || exit 9 + echo "cp -a \"$GLBINI\" ${BAKDIR}/" + cp -a "$GLBINI" ${BAKDIR}/ + echo "cp -a \"$SUDOER\" ${BAKDIR}/$(basename $SUDOER).sudo" + cp -a "$SUDOER" ${BAKDIR}/$(basename "$SUDOER").sudo + echo "cp -a \"$SCRIPT\" ${BAKDIR}/" + cp -a "$SCRIPT" ${BAKDIR}/$(basename $SCRIPT) + echo "crm configure show > ${BAKDIR}/crm_configure.txt" + crm configure show > ${BAKDIR}/crm_configure.txt + echo + echo "ls -l ${BAKDIR}/*" + ls -l ${BAKDIR}/* +} + +function f_make-backup() { + echo-funa run "${FUNCNAME[0]}" + crm cluster run "'$SCRIPT' -x make-backup-local" + echo-funa end "${FUNCNAME[0]}" +} + +function f_show-state() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "crm_mon -1r --include=failcounts,fencing-pending;echo;SAPHanaSR-showAttr;cs_clusterstate -i|grep -v \"#\"" + crm_mon -1r --include=failcounts,fencing-pending + echo + SAPHanaSR-showAttr + cs_clusterstate -i | grep -v "#" + echo-funa end "${FUNCNAME[0]}" +} + +function f_maintenance-on-classic() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "crm resource maintenance $MSTOLD on" + [ $DRYRUN = no ] && crm resource maintenance $MSTOLD on + wait-idle + echo "crm resource maintenance $CLNTOP on" + [ $DRYRUN = no ] && crm resource maintenance $CLNTOP on + wait-idle + echo "echo \"property cib-bootstrap-options: stop-orphan-resources=false\" | crm configure load update -" + [ $DRYRUN = no ] && echo "property cib-bootstrap-options: stop-orphan-resources=false" |\ + crm configure load update - + echo-funa run "${FUNCNAME[0]}" +} + +function f_maintenance-off-angi() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "crm resource refresh $CLNTOP" + [ $DRYRUN = no ] && crm resource refresh $CLNTOP + wait-idle + echo "crm resource maintenance $CLNTOP off" + [ $DRYRUN = no ] && crm resource maintenance $CLNTOP off + wait-idle + echo "crm resource refresh $MSTNEW" + [ $DRYRUN = no ] && crm resource refresh $MSTNEW + wait-idle + echo "crm resource maintenance $MSTNEW off" + [ $DRYRUN = no ] && crm resource maintenance $MSTNEW off + wait-idle + echo "crm resource refresh $CLNFIL" + [ $DRYRUN = no ] && crm resource refresh $CLNFIL + wait-idle + echo "crm resource maintenance $CLNFIL off" + [ $DRYRUN = no ] && crm resource maintenance $CLNFIL off + wait-idle + echo "echo \"property cib-bootstrap-options: stop-orphan-resources=true\" | crm configure load update -" + [ $DRYRUN = no ] && echo "property cib-bootstrap-options: stop-orphan-resources=true" |\ + crm configure load update - + echo-funa run "${FUNCNAME[0]}" +} + +function del-srhook-local-classic() { + EXE=$(basename $0) + source /run/$EXE.variables + [ -z $sid ] && sid=${SID,,} + echo "grep \"^\[ha_dr_provider_\" $GLBINI" + [ $DRYRUN = no ] && grep "^\[ha_dr_provider_" $GLBINI + echo + grep "^\[ha_dr_provider_" $GLBINI | + grep -i -e susChkSrv -e susTkOver -e SAPHanaSR -e susCostOpt | tr -d "\[\]" |\ + while read; do + P=${REPLY:15} + echo "su - $sidadm -c \"/usr/sbin/SAPHanaSR-manageProvider --sid=$SID --show --provider=$P\" > $TMP.global.ini.$P" + [ $DRYRUN = no ] && su - $sidadm -c "/usr/sbin/SAPHanaSR-manageProvider --sid=$SID --show --provider=$P" > $TMP.global.ini.$P + echo "su - $sidadm -c \"/usr/sbin/SAPHanaSR-manageProvider --sid=$SID --reconfigure --remove $TMP.global.ini.$P\"" + [ $DRYRUN = no ] && su - $sidadm -c "/usr/sbin/SAPHanaSR-manageProvider --sid=$SID --reconfigure --remove $TMP.global.ini.$P" + echo "rm $TMP.global.ini.$P" + [ $DRYRUN = no ] && rm $TMP.global.ini.$P + done + echo "su - $sidadm -c \"hdbnsutil -reloadHADRProviders\"" + [ $DRYRUN = no ] && su - $sidadm -c "hdbnsutil -reloadHADRProviders" + echo "grep \"^\[ha_dr_provider_\" $GLBINI" + [ $DRYRUN = no ] && grep "^\[ha_dr_provider_" $GLBINI + echo + + echo "cp $SUDOER $TMP.sudoers.classic" + [ $DRYRUN = no ] && cp $SUDOER $TMP.sudoers.classic + echo "grep -v \"$sidadm.*ALL..NOPASSWD.*crm_attribute.*$sid\" $TMP.sudoers.classic > $SUDOER" + [ $DRYRUN = no ] && grep -v "$sidadm.*ALL..NOPASSWD.*crm_attribute.*$sid" $TMP.sudoers.classic > $SUDOER + echo "cp $SUDOER $TMP.sudoers.classic" + [ $DRYRUN = no ] && cp $SUDOER $TMP.sudoers.classic + echo "grep -v \"$sidadm.*ALL..NOPASSWD.*SAPHanaSR-hookHelper.*sid=$SID\" $TMP.sudoers.classic > $SUDOER" + [ $DRYRUN = no ] && grep -v "$sidadm.*ALL..NOPASSWD.*SAPHanaSR-hookHelper.*sid=$SID" $TMP.sudoers.classic > $SUDOER + echo "rm $TMP.sudoers.classic" + [ $DRYRUN = no ] && rm $TMP.sudoers.classic +} + +function f_remove-srhook-classic() { + echo-funa run "${FUNCNAME[0]}" + crm cluster run "'$SCRIPT' -x del-srhook-local-classic" + echo-funa end "${FUNCNAME[0]}" +} + +function add-srhook-local-angi() { + EXE=$(basename $0) + source /run/$EXE.variables + [ -z $sid ] && sid=${SID,,} + for P in susHanaSR susTkOver susChkSrv; do + echo "su - $sidadm -c \"/usr/bin/SAPHanaSR-manageProvider --sid=$SID --reconfigure --add /usr/share/SAPHanaSR-angi/samples/global.ini_${P}\"" + [ $DRYRUN = no ] && su - $sidadm -c "/usr/bin/SAPHanaSR-manageProvider --sid=$SID --reconfigure --add /usr/share/SAPHanaSR-angi/samples/global.ini_${P}" + done + echo "su - $sidadm -c \"hdbnsutil -reloadHADRProviders\"" + [ $DRYRUN = no ] && su - $sidadm -c "hdbnsutil -reloadHADRProviders" + echo "grep -A2 \"^\[ha_dr_provider_\" $GLBINI" + [ $DRYRUN = no ] && grep -A2 "^\[ha_dr_provider_" $GLBINI + echo + grep "^\[ha_dr_provider_" $GLBINI | + grep -i -e susChkSrv -e susTkOver -e SAPHanaSR -e susCostOpt | tr -d "\[\]" |\ + while read; do + P=${REPLY:15} + echo "su - $sidadm -c \"/usr/bin/SAPHanaSR-manageProvider --sid=$SID --show --provider=${P}\"" + [ $DRYRUN = no ] && su - $sidadm -c "/usr/bin/SAPHanaSR-manageProvider --sid=$SID --show --provider=${P}" + done + echo + + echo "echo \"$sidadm ALL=(ALL) NOPASSWD: /usr/bin/SAPHanaSR-hookHelper --sid=$SID *\" >> $SUDOER" + [ $DRYRUN = no ] && echo "$sidadm ALL=(ALL) NOPASSWD: /usr/bin/SAPHanaSR-hookHelper --sid=$SID *" >> $SUDOER + echo "echo \"$sidadm ALL=(ALL) NOPASSWD: /usr/sbin/crm_attribute -n hana_${sid}_*\" >> $SUDOER" + [ $DRYRUN = no ] && echo "$sidadm ALL=(ALL) NOPASSWD: /usr/sbin/crm_attribute -n hana_${sid}_*" >> $SUDOER + echo "sudo -l -U $sidadm | grep -e crm_attribute -e SAPHanaSR-hookHelper" + [ $DRYRUN = no ] && sudo -l -U $sidadm |\ + grep -e crm_attribute -e SAPHanaSR-hookHelper +} + +function f_add-srhook-angi() { + echo-funa run "${FUNCNAME[0]}" + crm cluster run "'$SCRIPT' -x add-srhook-local-angi" + echo-funa end "${FUNCNAME[0]}" +} + +function f_remove-property() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + crm configure show SAPHanaSR | awk -F"=" '$1~/hana_/ {print $1}' |\ + while read; do + echo "crm_attribute --delete --type crm_config --name $REPLY" + [ $DRYRUN = no ] && crm_attribute --delete --type crm_config --name $REPLY + done + echo-funa end "${FUNCNAME[0]}" +} + +function f_remove-node-attribute() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + for N in $PRINOD $SECNOD; do + crm configure show $N | tr " " "\n" | awk -F "=" 'NR>5 {print $1}' |\ + while read; do + echo "crm_attribute --node $N --name $REPLY --delete" + [ $DRYRUN = no ] && crm_attribute --node $N --name $REPLY --delete + done + echo "crm_attribute --node $N --name hana_${sid}_sync_state --lifetime reboot --delete" + [ $DRYRUN = no ] && crm_attribute --node $N --name hana_${sid}_sync_state --lifetime reboot --delete + echo "crm_attribute --node $N --name master-rsc_SAPHana_${SID}_HDB$INO --lifetime reboot --delete" + [ $DRYRUN = no ] && crm_attribute --node $N --name master-rsc_SAPHana_${SID}_HDB$INO --lifetime reboot --delete + done + echo-funa end "${FUNCNAME[0]}" +} + +function f_remove-saphanatop-classic() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + for N in "//rsc_order[@id='$MSTORD']" "//clone[@id='$CLNTOP']"; do + echo "cibadmin --delete --xpath \"${N}\"" + [ $DRYRUN = no ] && cibadmin --delete --xpath "${N}" + done + wait-idle + echo "crm resource refresh $RSCTOP" + [ $DRYRUN = no ] && crm resource refresh $RSCTOP + echo-funa end "${FUNCNAME[0]}" +} + +function f_remove-saphanacon-classic() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + for N in "//rsc_colocation[@id='$MSTCOL']" "//rsc_order[@id='$MSTORD']" "//master[@id='$MSTOLD']"; do + echo "cibadmin --delete --xpath \"${N}\"" + [ $DRYRUN = no ] && cibadmin --delete --xpath "${N}" + done + wait-idle + echo "crm resource refresh $RSCCON" + [ $DRYRUN = no ] && crm resource refresh $RSCCON + echo-funa end "${FUNCNAME[0]}" +} + +function f_add-saphanatop-angi() { + echo-funa run "${FUNCNAME[0]}" + EXE=$(basename $0) + source /run/$EXE.variables + [ -z $sid ] && sid=${SID,,} + wait-idle + echo -n "echo \"" + echo -n $CIB_CLNTMP_ANG |\ + sed -e s/@@sid@@/${SID}/g \ + -e s/@@ino@@/${INO}/g \ + -e s/@@clntop@@/${CLNTOP}/g \ + -e s/@@mstnew@@/${MSTNEW}/g \ + -e s/'#'/\\n'#'\\n/g + echo "\" | crm configure load update -" + echo "crm configure show $CLNTOP" + [ $DRYRUN = no ] && echo $CIB_CLNTMP_ANG |\ + sed -e s/@@sid@@/${SID}/g \ + -e s/@@ino@@/${INO}/g \ + -e s/@@clntop@@/${CLNTOP}/g \ + -e s/@@mstnew@@/${MSTNEW}/g \ + -e s/'#'/\\n'#'\\n/g |\ + crm configure load update - + [ $DRYRUN = no ] && crm configure show $CLNTOP + echo-funa end "${FUNCNAME[0]}" +} + +function f_add-saphanacon-angi() { + echo-funa run "${FUNCNAME[0]}" + EXE=$(basename $0) + source /run/$EXE.variables + [ -z $sid ] && sid=${SID,,} + wait-idle + echo -n "echo \"" + echo -n $CIB_MSTTMP_ANG |\ + sed -e s/@@sid@@/${SID}/g \ + -e s/@@ino@@/${INO}/g \ + -e s/@@clntop@@/${CLNTOP}/g \ + -e s/@@mstnew@@/${MSTNEW}/g \ + -e s/@@rscipa@@/${RSCIPA}/g \ + -e s/'#'/\\n'#'\\n/g + echo "\" | crm configure load update -" + echo "crm configure show $MSTNEW" + [ $DRYRUN = no ] && echo $CIB_MSTTMP_ANG |\ + sed -e s/@@sid@@/${SID}/g \ + -e s/@@ino@@/${INO}/g \ + -e s/@@clntop@@/${CLNTOP}/g \ + -e s/@@mstnew@@/${MSTNEW}/g \ + -e s/@@rscipa@@/${RSCIPA}/g \ + -e s/'#'/\\n'#'\\n/g |\ + crm configure load update - + [ $DRYRUN = no ] && crm configure show $MSTNEW + echo-funa end "${FUNCNAME[0]}" +} + +function f_add-saphanafil-angi() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo -n "echo \"" + echo -n $CIB_CLNTMP_FIL |\ + sed -e s/@@sid@@/${SID}/g \ + -e s/@@ino@@/${INO}/g \ + -e s/'#'/\\n'#'\\n/g + echo "\" | crm configure load update -" + echo "crm configure show $CLNFIL" + [ $DRYRUN = no ] && echo $CIB_CLNTMP_FIL |\ + sed -e s/@@sid@@/${SID}/g \ + -e s/@@ino@@/${INO}/g \ + -e s/'#'/\\n'#'\\n/g |\ + crm configure load update - + [ $DRYRUN = no ] && crm configure show $CLNFIL + echo-funa end "${FUNCNAME[0]}" +} + +function f_install-rpm-angi() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "crm cluster run \"zypper --non-interactive in -l -f -y '${RPMNEW}'\"" + [ $DRYRUN = no ] && crm cluster run "zypper --non-interactive in \ + -l -f -y '${RPMNEW}'" + # TODO [ $DRYRUN = no ] && crm cluster run "rpm -i ~/SAPHanaSR-angi-1.2.5-150600.3.11.1.noarch.rpm" + echo "crm cluster run \"rpm -q '${RPMNEW}' --queryformat %{NAME}\"" + [ $DRYRUN = no ] && crm cluster run "rpm -q '${RPMNEW}' --queryformat %{NAME}" + echo "hash -r" + [ $DRYRUN = no ] && hash -r + echo-funa end "${FUNCNAME[0]}" +} + +function f_remove-rpm-classic() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "crm cluster run \"rpm -e --nodeps '${RPMOLD}'\"" + [ $DRYRUN = no ] && crm cluster run "rpm -e --nodeps '${RPMOLD}'" + echo-funa end "${FUNCNAME[0]}" +} + +function f_check-prereq() { + echo-funa run "${FUNCNAME[0]}" + # TODO meaningful return codes + pre_rc=0 + if [ -z $PRINOD ]; then + echo "ERROR: Can not determine primary node." + pre_rc=9 + fi + if [ -z $SECNOD ]; then + echo "ERROR: Can not determine secondary node." + pre_rc=9 + fi + if [ $HOSTNAME != $PRINOD ]; then + echo "ERROR: Looks not like primary node." + pre_rc=9 + fi + os_vers=$(grep "PRETTY_NAME=\"SUSE Linux Enterprise Server 15 SP[4-7]\"" /etc/os-release \ + >/dev/null 2>&1; echo $?) + if [ $os_vers != 0 ]; then + echo "ERROR: Local OS version is not supported." + pre_rc=9 + fi + hana_rev=$(su - $sidadm -c "HDB version" | awk -F: '$1==" version" {print $2}' | tr -d ".") + if [ ! $hana_rev -ge 200059040000000000 ]; then + echo "ERROR: Local HANA revision looks like not supported." + pre_rc=9 + fi + hana_py=$(su - $sidadm -c "python --version" | grep "Python 3\.[7-9]" \ + >/dev/null 2>&1; echo $?) + if [ $hana_py != 0 ]; then + echo "ERROR: Local HANA python looks like not supported." + pre_rc=9 + fi + # shellcheck disable=SC2029 + ssh root@$SECNOD "'$SCRIPT' -v" | grep "$VERSION" >/dev/null; my_rc=$? + if [ $my_rc != 0 ]; then + echo "ERROR: Can not call $SCRIPT on ${SECNOD}." + pre_rc=9 + fi + if [ ! -r $SUDOER ]; then + echo "ERROR: Can not access ${SUDOER}." + pre_rc=9 + fi + n_sid=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances | wc -l) + if [ $n_sid != 1 ]; then + echo "ERROR: Not exactly one SAP instance found." + pre_rc=9 + fi + crm configure show cib-bootstrap-options >/dev/null; my_rc=$? + if [ $my_rc != 0 ]; then + echo "ERROR: Can not access CIB." + pre_rc=9 + fi + n_nd=$(crm configure show type:node | grep -c "^node [0-9]") + if [ $n_nd != 2 ]; then + echo "ERROR: Not exactly two cluster nodes found in CIB." + pre_rc=9 + fi + hana_up=$(SAPHanaSR-showAttr --format=script |\ + awk -F/ 'BEGIN{p=0;d=0}; $3~/PROMOTED/{p++}; $3~/DEMOTED/{d++}; \ + END{print "p="p"_d="d}') + if [ $hana_up != "p=1_d=1" ]; then + echo "ERROR: Can not find running ${MSTOLD}." + pre_rc=9 + fi + n_cnstr=$(crm configure show type:colocation | grep -c colocation) + if [ $n_cnstr != 1 ]; then + echo "ERROR: Not exactly one colocation constraint found." + pre_rc=9 + fi + n_cnstr=$(crm configure show type:order | grep -c order) + if [ $n_cnstr != 1 ]; then + echo "ERROR: Not exactly one order constraint found." + pre_rc=9 + fi + n_old=$(rpm -qa | grep -c "${RPMOLD}-0\.16[2-9]\.") + if [ $n_old != 1 ]; then + echo "ERROR: Package $RPMOLD in correct version not installed." + pre_rc=9 + fi + xmlt=$(rpm -qa | grep -c libxml2-tools) + if [ $xmlt != 1 ]; then + echo "ERROR: Package libxml2-tools not installed." + pre_rc=9 + fi + cltl=$(rpm -qa | grep -c ClusterTools2) + if [ $cltl != 1 ]; then + echo "ERROR: Package ClusterTools2 not installed." + pre_rc=9 + fi + n_new=$(rpm -qa | grep -c "${RPMNEW}-[1-9]\.[1-9]") + if [ $n_new != 0 ]; then + echo "ERROR: Package $RPMNEW installed." + pre_rc=9 + fi + tstr=$(rpm -qa | grep -c SAPHanaSR-tester-client) + if [ $tstr != 0 ]; then + echo "ERROR: Package SAPHanaSR-tester-client installed." + pre_rc=9 + fi + rmt=$(zypper se $RPMNEW 2>/dev/null | grep -c $RPMNEW) + if [ $rmt != 1 ]; then + echo "ERROR: Can not find $RPMNEW in software channels." + pre_rc=9 + fi + show-variables + ALL_RC=$pre_rc + echo "RC=$pre_rc" + echo-funa end "${FUNCNAME[0]}" +} + +function f_check-final-state() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "TODO /usr/bin/SAPHanaSR-manageAttr" + # TODO [ $DRYRUN = no ] && /usr/bin/SAPHanaSR-manageAttr + echo-funa end "${FUNCNAME[0]}" +} + +function f_test-secondary() { + echo-funa run "${FUNCNAME[0]}" + wait-idle + echo "root@$SECNOD \"hostname; killall -9 hdbnameserver\"" + [ $DRYRUN = no ] && ssh root@$SECNOD "hostname; killall -9 hdbnameserver" + wait-idle + echo "crm resource cleanup $CLNTOP" + [ $DRYRUN = no ] && crm resource cleanup $CLNTOP + echo-funa end "${FUNCNAME[0]}" +} + +function cleanup() { + crm cluster run "rm -f /run/$EXE.variables" +} + +function erase-classic() { + f_show-state + f_make-backup + f_maintenance-on-classic + f_remove-srhook-classic + f_remove-saphanacon-classic + f_remove-saphanatop-classic + f_remove-property + f_remove-node-attribute + f_remove-rpm-classic +} + +function upgrade-to-angi() { + erase-classic + f_install-rpm-angi + f_add-srhook-angi + f_add-saphanatop-angi + f_add-saphanacon-angi + f_add-saphanafil-angi + f_maintenance-off-angi + f_show-state + f_check-final-state + f_test-secondary + f_show-state +} + +function show-help() { + echo + echo "$EXE [ OPTION ]" + echo "$EXE --run [ [...] ]" + echo + echo "OPTION:" + echo " --help" + echo " --version" + echo " --list-functions" + echo " --check-prereq" + echo " --erase" + echo " --upgrade" + echo + echo "SAPHanaSR-upgrade-to-angi-demo is shipped as technology preview." + echo +} + +# +# main() +# +case $1 in + -v | --version) + echo + echo "$EXE $VERSION" + echo + exit + ;; + -l | --list*) + echo + grep "^function.f_.*{" $0 | colrm 1 8 | tr -d "(){" + echo + exit + ;; + -c | --check*) + init-variables + f_check-prereq + cleanup >/dev/null 2>&1 + exit $ALL_RC + ;; + -e | --erase) + init-variables + erase-classic + cleanup >/dev/null 2>&1 + ;; + -u | --upgrade) + init-variables + upgrade-to-angi + cleanup >/dev/null 2>&1 + ;; + -r | --run | --run-fun*) + init-variables + while [ $# -gt 1 ]; do + shift + $1 + done + cleanup >/dev/null 2>&1 + ;; + -x) + # init-variables have been done on every node + while [ $# -gt 1 ]; do + shift + $1 + done + # cleanup will be done on every node + ;; + *) + show-help + ;; +esac +exit $ALL_RC +# diff --git a/tools/saphana_sr_tools.py b/tools/saphana_sr_tools.py index 62c35000..e83eaf83 100644 --- a/tools/saphana_sr_tools.py +++ b/tools/saphana_sr_tools.py @@ -272,6 +272,8 @@ def fill_res_dict(self): self.res_dict = {} # Controller con_res_arr = self.root.findall(f"./configuration/resources//*[@type='SAPHanaController']/instance_attributes/nvpair[@name='SID'][@value='{sid}']/../../..") + if len(con_res_arr) == 0: + con_res_arr = self.root.findall(f"./configuration/resources//*[@type='SAPHana']/instance_attributes/nvpair[@name='SID'][@value='{sid}']/../../..") if len(con_res_arr) == 1: con_res = con_res_arr[0] con_name = con_res.attrib['id'] @@ -529,6 +531,8 @@ def get_sids(self): try: for ia in root.findall("./configuration/resources//*[@type='SAPHanaController']/instance_attributes/nvpair[@name='SID']"): sids.append(ia.attrib['value']) + for ia in root.findall("./configuration/resources//*[@type='SAPHana']/instance_attributes/nvpair[@name='SID']"): + sids.append(ia.attrib['value']) except AttributeError: print(f"Could not find any SAPHanaController resource in cluster config") self.sids = sids