From c8e796b35bdbbfb62c45fa22e4b3bc9a427348bf Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 30 Jan 2023 16:54:02 +0000 Subject: [PATCH 01/84] Add RPC_PORT to IMA env --- core/schains/ima.py | 20 +++++++++++++++----- tests/schains/ima_test.py | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/core/schains/ima.py b/core/schains/ima.py index eac9b5b63..b3aab82aa 100644 --- a/core/schains/ima.py +++ b/core/schains/ima.py @@ -17,15 +17,19 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import json +import logging +import os from dataclasses import dataclass +from flask import g +from skale.dataclasses.skaled_ports import SkaledPorts +from websocket import create_connection + from core.schains.config.directory import schain_config_dir from core.schains.config.helper import get_schain_ports, get_schain_config, get_chain_id from core.ima.schain import get_schain_ima_abi_filepath -import json -import logging -import os from tools.configs import SGX_SSL_KEY_FILEPATH, SGX_SSL_CERT_FILEPATH, SGX_SERVER_URL from tools.configs.containers import CONTAINERS_INFO from tools.configs.db import REDIS_URI @@ -37,8 +41,6 @@ ) from tools.configs.schains import SCHAINS_DIR_PATH from tools.configs.web3 import ABI_FILEPATH -from flask import g -from websocket import create_connection logger = logging.getLogger(__name__) @@ -102,6 +104,7 @@ def to_dict(self): 'CID_MAIN_NET': self.cid_main_net, 'CID_SCHAIN': self.cid_schain, 'MONITORING_PORT': self.monitoring_port, + 'RPC_PORT': self.rpc_port, 'TIME_FRAMING': self.time_framing } @@ -162,6 +165,7 @@ def get_ima_env(schain_name: str, mainnet_chain_id: int) -> ImaEnv: cid_main_net=mainnet_chain_id, cid_schain=schain_chain_id, monitoring_port=node_info['imaMonitoringPort'], + rpc_port=get_ima_rpc_port(schain_name), time_framing=IMA_TIME_FRAMING ) @@ -179,6 +183,12 @@ def get_ima_monitoring_port(schain_name): return None +def get_ima_rpc_port(schain_name): + config = get_schain_config(schain_name) + base_port = config['skaleConfig']['nodeInfo']['basePort'] + return base_port + SkaledPorts.IMA_RPC.value + + def get_ima_container_statuses(): containers_list = g.docker_utils.get_all_ima_containers(all=True, format=True) ima_containers = [{'name': container['name'], 'state': container['state']['Status']} diff --git a/tests/schains/ima_test.py b/tests/schains/ima_test.py index 583b50ac5..735301752 100644 --- a/tests/schains/ima_test.py +++ b/tests/schains/ima_test.py @@ -9,4 +9,5 @@ def test_get_ima_env(_schain_name, schain_config): ima_env_dict = ima_env.to_dict() assert len(ima_env_dict) == 20 assert ima_env_dict['CID_MAIN_NET'] == 123 + assert ima_env_dict['IMA_RPC'] == 10010 isinstance(ima_env_dict['CID_SCHAIN'], str) From 863ec1cda8798f1e83b8c70b15a973b422c44292 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 30 Jan 2023 16:57:28 +0000 Subject: [PATCH 02/84] Add IMA_RPC to internal ports --- core/schains/firewall/rule_controller.py | 3 ++- tests/firewall/rule_controller_test.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/core/schains/firewall/rule_controller.py b/core/schains/firewall/rule_controller.py index 0e5575af0..e71e456bb 100644 --- a/core/schains/firewall/rule_controller.py +++ b/core/schains/firewall/rule_controller.py @@ -112,7 +112,8 @@ def internal_ports(self) -> Iterable[int]: self.port_allocation.CATCHUP, self.port_allocation.PROPOSAL, self.port_allocation.BINARY_CONSENSUS, - self.port_allocation.ZMQ_BROADCAST + self.port_allocation.ZMQ_BROADCAST, + self.port_allocation.IMA_RPC ) ) diff --git a/tests/firewall/rule_controller_test.py b/tests/firewall/rule_controller_test.py index f1c877106..9bb730afe 100644 --- a/tests/firewall/rule_controller_test.py +++ b/tests/firewall/rule_controller_test.py @@ -113,7 +113,10 @@ def test_schain_rule_controller_no_sync_rules(): SChainRule(port=10005, first_ip='4.4.4.4', last_ip=None), SChainRule(port=10007, first_ip=None, last_ip=None), SChainRule(port=10008, first_ip=None, last_ip=None), - SChainRule(port=10009, first_ip=None, last_ip=None) + SChainRule(port=10009, first_ip=None, last_ip=None), + SChainRule(port=10010, first_ip='2.2.2.2', last_ip=None), + SChainRule(port=10010, first_ip='3.3.3.3', last_ip=None), + SChainRule(port=10010, first_ip='4.4.4.4', last_ip=None) } src = SChainTestRuleController( 'test', @@ -158,7 +161,10 @@ def test_schain_rule_controller_configure(): SChainRule(port=10005, first_ip='4.4.4.4', last_ip=None), SChainRule(port=10007, first_ip=None, last_ip=None), SChainRule(port=10008, first_ip=None, last_ip=None), - SChainRule(port=10009, first_ip=None, last_ip=None) + SChainRule(port=10009, first_ip=None, last_ip=None), + SChainRule(port=10010, first_ip='2.2.2.2', last_ip=None), + SChainRule(port=10010, first_ip='3.3.3.3', last_ip=None), + SChainRule(port=10010, first_ip='4.4.4.4', last_ip=None) } src.configure(base_port=base_port, own_ip=own_ip, node_ips=node_ips) assert not src.is_rules_synced() @@ -190,7 +196,10 @@ def test_schain_rule_controller_configure(): SChainRule(port=10005, first_ip='5.5.5.5', last_ip=None), SChainRule(port=10007, first_ip=None, last_ip=None), SChainRule(port=10008, first_ip=None, last_ip=None), - SChainRule(port=10009, first_ip=None, last_ip=None) + SChainRule(port=10009, first_ip=None, last_ip=None), + SChainRule(port=10010, first_ip='1.1.1.1', last_ip=None), + SChainRule(port=10010, first_ip='3.3.3.3', last_ip=None), + SChainRule(port=10010, first_ip='5.5.5.5', last_ip=None), } assert not src.is_rules_synced() assert list(src.expected_rules()) == list(sorted(expected_rules)) From 402c98e717d653602c749b8f5e4ba423b9b9b3a5 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 30 Jan 2023 17:36:44 +0000 Subject: [PATCH 03/84] Update skale.py to 5.8b1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9a7951e53..afc0eef86 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ simple-crypt==4.1.7 pycryptodome==3.12.0 python-iptables==1.0.0 -skale.py==5.8dev4 +skale.py==5.8b1 ima-predeployed==1.3.5b0 etherbase-predeployed==1.0.0 From 57b0efc375b8fae14c4fff10895c0dceff3595f4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 31 Jan 2023 12:19:56 +0000 Subject: [PATCH 04/84] Fix ImaEnv class --- core/schains/ima.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/schains/ima.py b/core/schains/ima.py index b3aab82aa..623e4b3bc 100644 --- a/core/schains/ima.py +++ b/core/schains/ima.py @@ -82,6 +82,8 @@ class ImaEnv: time_framing: int + rpc_port: int + def to_dict(self): """Returns upper-case representation of the ImaEnv object""" return { From 6c6ed969482ec2c1fd3af843c4bb0d93bdf640b4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 6 Feb 2023 15:56:59 +0000 Subject: [PATCH 05/84] Fix tests --- tests/schains/ima_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/schains/ima_test.py b/tests/schains/ima_test.py index 735301752..a8a5cec3a 100644 --- a/tests/schains/ima_test.py +++ b/tests/schains/ima_test.py @@ -7,7 +7,7 @@ def test_get_ima_env(_schain_name, schain_config): mainnet_chain_id=123 ) ima_env_dict = ima_env.to_dict() - assert len(ima_env_dict) == 20 + assert len(ima_env_dict) == 21 assert ima_env_dict['CID_MAIN_NET'] == 123 - assert ima_env_dict['IMA_RPC'] == 10010 + assert ima_env_dict['RPC_PORT'] == 10010 isinstance(ima_env_dict['CID_SCHAIN'], str) From d5dd69fdc9dc9322d333ac79d33cb0e37e51d6a5 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 6 Feb 2023 18:31:17 +0000 Subject: [PATCH 06/84] Fix route test --- tests/routes/schains_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/routes/schains_test.py b/tests/routes/schains_test.py index 52c760461..95df2c1e7 100644 --- a/tests/routes/schains_test.py +++ b/tests/routes/schains_test.py @@ -88,7 +88,8 @@ def test_firewall_rules_route(skale_bp, schain_config): {'port': 10005, 'first_ip': '127.0.0.2', 'last_ip': '127.0.0.2'}, {'port': 10007, 'first_ip': None, 'last_ip': None}, {'port': 10008, 'first_ip': None, 'last_ip': None}, - {'port': 10009, 'first_ip': None, 'last_ip': None} + {'port': 10009, 'first_ip': None, 'last_ip': None}, + {'port': 10010, 'first_ip': '127.0.0.2', 'last_ip': '127.0.0.2'} ] } } From c8d388a4ba974c7e9dab713fc1bb3ec450ba71ac Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 13 Feb 2023 18:00:04 +0000 Subject: [PATCH 07/84] Fix firewall tests --- tests/firewall/default_rule_controller_test.py | 7 ++++--- tests/firewall/rule_controller_test.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/firewall/default_rule_controller_test.py b/tests/firewall/default_rule_controller_test.py index d60851ccd..c2473e16f 100644 --- a/tests/firewall/default_rule_controller_test.py +++ b/tests/firewall/default_rule_controller_test.py @@ -104,7 +104,8 @@ def run_concurrent_rc_syncing( base_port + offset for offset in [ SkaledPorts.PROPOSAL.value, - SkaledPorts.BINARY_CONSENSUS.value + SkaledPorts.BINARY_CONSENSUS.value, + SkaledPorts.IMA_RPC.value ] for base_port in base_ports ] @@ -155,10 +156,10 @@ def run_concurrent_rc_syncing( if ip != own_ip: assert sum( map(lambda x: x[0] == ip, rules) - ) == 4 * schain_number, ip + ) == 5 * schain_number, ip assert sum( map(lambda x: x.first_ip == ip, c.rules) - ) == 4 * schain_number, ip + ) == 5 * schain_number, ip # Check that all internal ports rules are there except CATCHUP for p in internal_ports: diff --git a/tests/firewall/rule_controller_test.py b/tests/firewall/rule_controller_test.py index 9bb730afe..e0aa26a15 100644 --- a/tests/firewall/rule_controller_test.py +++ b/tests/firewall/rule_controller_test.py @@ -36,7 +36,10 @@ def test_schain_rule_controller(): SChainRule(port=10069, first_ip='15.15.15.15', last_ip='18.18.18.18'), SChainRule(port=10071, first_ip=None, last_ip=None), SChainRule(port=10072, first_ip=None, last_ip=None), - SChainRule(port=10073, first_ip=None, last_ip=None) + SChainRule(port=10073, first_ip=None, last_ip=None), + SChainRule(port=10074, first_ip='1.1.1.1', last_ip=None), + SChainRule(port=10074, first_ip='2.2.2.2', last_ip=None), + SChainRule(port=10074, first_ip='4.4.4.4', last_ip=None) } src = SChainTestRuleController( 'test', @@ -82,7 +85,10 @@ def test_schain_rule_controller(): SChainRule(port=10069, first_ip='20.20.20.20', last_ip='21.21.21.21'), SChainRule(port=10071, first_ip=None, last_ip=None), SChainRule(port=10072, first_ip=None, last_ip=None), - SChainRule(port=10073, first_ip=None, last_ip=None) + SChainRule(port=10073, first_ip=None, last_ip=None), + SChainRule(port=10074, first_ip='1.1.1.1', last_ip=None), + SChainRule(port=10074, first_ip='4.4.4.4', last_ip=None), + SChainRule(port=10074, first_ip='5.5.5.5', last_ip=None) } assert src.is_rules_synced() assert list(src.expected_rules()) == list(sorted(expected_rules)) From a4c02eb9fb1a606f66d148c98e25aa326e965b4a Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Mon, 10 Apr 2023 15:55:48 +0100 Subject: [PATCH 08/84] Remove OS version check --- core/node.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/core/node.py b/core/node.py index 09276d921..88ed21f0b 100644 --- a/core/node.py +++ b/core/node.py @@ -25,7 +25,6 @@ import logging import platform import hashlib -import distro import requests @@ -314,8 +313,6 @@ def get_node_hardware_info() -> dict: system_release = f'{platform.system()}-{platform.release()}' uname_version = platform.uname().version attached_storage_size = get_block_device_size() - os_name = distro.id() - os_version = distro.version() return { 'cpu_total_cores': psutil.cpu_count(logical=True), 'cpu_physical_cores': psutil.cpu_count(logical=False), @@ -325,9 +322,7 @@ def get_node_hardware_info() -> dict: 'mem_available': psutil.virtual_memory().available, 'system_release': system_release, 'uname_version': uname_version, - 'attached_storage_size': attached_storage_size, - 'os_name': os_name, - 'os_version': os_version + 'attached_storage_size': attached_storage_size } From 84ad70ebd0a9bfab0de69cdb52bc0ea43bdf0687 Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Mon, 10 Apr 2023 15:56:26 +0100 Subject: [PATCH 09/84] Update dependencies --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ca3eb1dd9..225683fac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,6 @@ filestorage-predeployed==1.1.0b2 config-controller-predeployed==1.0.1b0 psutil==5.9.3 -distro==1.8.0 colorful==0.5.4 celery==5.2.2 From 85c6abcf97d252a7e7bfa18ac5c5c8f2ba6b4721 Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Mon, 10 Apr 2023 15:56:49 +0100 Subject: [PATCH 10/84] Remove OS version check tests --- tests/node_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/node_test.py b/tests/node_test.py index 88be415ba..b2ee4ec1d 100644 --- a/tests/node_test.py +++ b/tests/node_test.py @@ -268,8 +268,6 @@ def test_get_node_hardware_info(get_block_device_size_mock): assert isinstance(info['mem_available'], int) assert isinstance(info['system_release'], str) assert isinstance(info['uname_version'], str) - assert isinstance(info['os_name'], str) - assert isinstance(info['os_version'], str) assert info['attached_storage_size'] == 300 From a18f17afb4b35c267084988a336824893ee8eada Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 22 May 2023 18:57:45 +0000 Subject: [PATCH 11/84] Add monitor tasks module --- core/schains/monitor/tasks.py | 69 +++++++++++++++++++++++++++++ tests/schains/monitor/tasks_test.py | 7 +++ 2 files changed, 76 insertions(+) create mode 100644 core/schains/monitor/tasks.py create mode 100644 tests/schains/monitor/tasks_test.py diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py new file mode 100644 index 000000000..ae9885301 --- /dev/null +++ b/core/schains/monitor/tasks.py @@ -0,0 +1,69 @@ +import time +import logging +from concurrent.futures import ThreadPoolExecutor +from typing import Callable + +logger = logging.getLogger(__name__) + + +class Task: + def __init__( + self, + schain: str, + name: str, + action: Callable, + index: int, + *args, + **kwargs + ) -> None: + self.schain = schain + self.name = name + self.action = action + self.index = index + self.args = args + self.kwargs = kwargs + + @property + def signature(self) -> str: + return f'[{self.schain}-{self.name}]' + + def run(self): + self.action(*self.args, **self.kwargs) + + +def ensure_tasks(executor, tasks, futures): + for i, task in enumerate(tasks): + f = futures[i] + if f is not None and not f.running(): + result = f.result() + logger.info('Task %s finished with %s', task.signature, result) + if f is None or not f.running(): + logger.info('Launching task %s', task.signature) + futures[i] = executor.submit(task.run()) + + +def start_tasks(schain: str): + logger.info('Starting schain %s tasks', schain) + tasks = [ + Task(schain, 'config-task', monitor_chain, 0), + Task(schain, 'skaled-task', monitor_chain, 1), + ] + futures = [None for i in range(len(tasks))] + with ThreadPoolExecutor(max_workers=len(tasks)) as executor: + while True: + ensure_tasks(executor, tasks, futures) + + +def monitor_chain(): + for i in range(50): + if i % 5 == 0: + logger.info('Monitoring chain %d', i) + time.sleep(2) + + +def monitor_config(): + pass + + +def monitor_skaled(): + pass diff --git a/tests/schains/monitor/tasks_test.py b/tests/schains/monitor/tasks_test.py new file mode 100644 index 000000000..93faa0577 --- /dev/null +++ b/tests/schains/monitor/tasks_test.py @@ -0,0 +1,7 @@ +import time +from core.schains.monitor.tasks import start_tasks + + +def test_tasks(): + start_tasks('test-chain') + time.sleep(60) From d91df36793370c4312434fb601af162c1ea0d39f Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 24 May 2023 12:43:06 +0000 Subject: [PATCH 12/84] Add fail after error flag for install_python_dependencies --- scripts/install_python_dependencies.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/install_python_dependencies.sh b/scripts/install_python_dependencies.sh index 391482658..d7498c02a 100644 --- a/scripts/install_python_dependencies.sh +++ b/scripts/install_python_dependencies.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +set -ea python -m pip install --upgrade pip pip install -r requirements.txt @@ -6,4 +7,4 @@ pip install -r requirements-dev.txt pip uninstall pycrypto -y pip uninstall pycryptodome -y pip install pycryptodome -find . -name "*.pyc" -exec rm -f {} \; \ No newline at end of file +find . -name "*.pyc" -exec rm -f {} \; From c93b125eaaca90320fd194210237ee5f2382e476 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 24 May 2023 14:21:55 +0000 Subject: [PATCH 13/84] Bump codecov to 2.1.13 --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 5ad45b3f6..63a24367c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,4 @@ mock==4.0.2 blinker==1.4 pytest-cov==2.9.0 -codecov==2.1.9 +codecov==2.1.13 From 48df022d5f276ec31e0a19e75910161af802390d Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 24 May 2023 14:35:31 +0000 Subject: [PATCH 14/84] Bump flask and cryptography --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 36b5530c3..78917ef5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ peewee==3.9.5 -Flask==2.0.2 +Flask==2.2.5 Werkzeug==2.0.2 gunicorn==20.1.0 @@ -30,7 +30,7 @@ celery==5.2.2 filelock==3.0.12 pyOpenSSL==19.1.0 -cryptography==35.0.0 +cryptography==39.0.1 python-dateutil==2.8.1 python-telegram-bot==12.8 sh==1.14.1 From a50daeb7c69cce482c9fe4ff286fe24ec1906d64 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 24 May 2023 14:39:26 +0000 Subject: [PATCH 15/84] Bump Werkzeug to 2.2.2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 78917ef5b..40c5aedac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ peewee==3.9.5 Flask==2.2.5 -Werkzeug==2.0.2 +Werkzeug==2.2.2 gunicorn==20.1.0 Jinja2==3.0.3 From faec41f569be54047ade1670f8a4629f51ebdf0a Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 24 May 2023 18:19:47 +0000 Subject: [PATCH 16/84] Bump pyOpenSSL to 2.1.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 40c5aedac..e239a61be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ celery==5.2.2 filelock==3.0.12 -pyOpenSSL==19.1.0 +pyOpenSSL==23.1.1 cryptography==39.0.1 python-dateutil==2.8.1 python-telegram-bot==12.8 From 1e234eb5ab3f621e313258a1f7549fd853d1c7b1 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 25 May 2023 12:00:13 +0000 Subject: [PATCH 17/84] Update docker.py version to 6.1.2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e239a61be..393b038ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ gunicorn==20.1.0 Jinja2==3.0.3 -docker==5.0.3 +docker==6.1.2 simple-crypt==4.1.7 pycryptodome==3.12.0 python-iptables==1.0.0 From 9ebea08b4a8440e53485a79be8e62301f9c059c2 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 26 May 2023 18:14:41 +0000 Subject: [PATCH 18/84] Split checks into separate classes for each task --- core/schains/checks.py | 156 +++++++++++++++++++++++++++++------ tests/schains/checks_test.py | 2 +- 2 files changed, 133 insertions(+), 25 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 9840e9b41..3c86b44dd 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -20,6 +20,8 @@ import os import time import logging +from abc import ABC, abstractmethod +from typing import Any, Dict from core.schains.config.directory import ( get_schain_config, @@ -75,26 +77,28 @@ def __init__(self, status: bool, data: dict = None): self.data = data if data else {} -class SChainChecks: +class IChecks(ABC): + @abstractmethod + def get_all(self, log=True, save=False, checks_filter=None) -> Dict: + pass + + @abstractmethod + def is_healthy(self) -> bool: + pass + + +class ConfigChecks(IChecks): def __init__( self, schain_name: str, node_id: int, schain_record: SChainRecord, - rule_controller: IRuleController, - rotation_id: int = 0, - *, - ima_linked: bool = True, - dutils: DockerUtils = None + rotation_id: int ): self.name = schain_name self.node_id = node_id self.schain_record = schain_record self.rotation_id = rotation_id - self.dutils = dutils or DockerUtils() - self.container_name = get_container_name(SCHAIN_CONTAINER, self.name) - self.ima_linked = ima_linked - self.rc = rule_controller @property def config_dir(self) -> CheckRes: @@ -113,6 +117,7 @@ def dkg(self) -> CheckRes: @property def config(self) -> CheckRes: + # TODO: this should be check for the newest config """Checks that sChain config file exists""" config_filepath = schain_config_filepath(self.name) if not os.path.isfile(config_filepath): @@ -121,6 +126,72 @@ def config(self) -> CheckRes: schain_config_version_match(self.name, self.schain_record) ) + def get_all(self, log=True, save=False, checks_filter=None) -> Dict: + if not checks_filter: + checks_filter = API_ALLOWED_CHECKS + checks_dict = {} + for check in checks_filter: + if hasattr(self, check): + if check not in API_ALLOWED_CHECKS: + logger.warning('Check %s is not allowed or does not exist', check) + else: + checks_dict[check] = getattr(self, check).status + if log: + log_checks_dict(self.name, checks_dict) + if save: + save_checks_dict(self.name, checks_dict) + return checks_dict + + def is_healthy(self) -> bool: + checks = self.get_all() + return False not in checks.values() + + +class ContainerChecks(IChecks): + def __init__( + self, + schain_name: str, + schain_record: SChainRecord, + rule_controller: IRuleController, + *, + ima_linked: bool = True, + dutils: DockerUtils = None + ): + self.name = schain_name + self.schain_record = schain_record + self.dutils = dutils or DockerUtils() + self.container_name = get_container_name(SCHAIN_CONTAINER, self.name) + self.ima_linked = ima_linked + self.rc = rule_controller + + def get_all(self, log=True, save=False, checks_filter=None) -> Dict: + if not checks_filter: + checks_filter = API_ALLOWED_CHECKS + checks_dict = {} + for check in checks_filter: + if check == 'ima_container' and (DISABLE_IMA or not self.ima_linked): + logger.info(f'Check {check} will be skipped - IMA is not linked') + elif check not in API_ALLOWED_CHECKS: + logger.warning(f'Check {check} is not allowed or does not exist') + else: + if hasattr(self, check): + checks_dict[check] = getattr(self, check).status + if log: + log_checks_dict(self.name, checks_dict) + if save: + save_checks_dict(self.name, checks_dict) + return checks_dict + + def is_healthy(self) -> bool: + checks = self.get_all() + return False not in checks.values() + + @property + def config_file(self) -> CheckRes: + """ Checks that at least one sChain config file exists """ + config_filepath = schain_config_filepath(self.name) + return CheckRes(os.path.isfile(config_filepath)) + @property def volume(self) -> CheckRes: """Checks that sChain volume exists""" @@ -129,7 +200,7 @@ def volume(self) -> CheckRes: @property def firewall_rules(self) -> CheckRes: """Checks that firewall rules are set correctly""" - if self.config.status: + if self.config_file.status: conf = get_schain_config(self.name) base_port = get_base_port_from_config(conf) node_ips = get_node_ips_from_config(conf) @@ -167,7 +238,7 @@ def ima_container(self) -> CheckRes: def rpc(self) -> CheckRes: """Checks that local skaled RPC is accessible""" res = False - if self.config.status: + if self.config_file.status: http_endpoint = get_local_schain_http_endpoint(self.name) timeout = get_endpoint_alive_check_timeout( self.schain_record.failed_rpc_count @@ -178,7 +249,7 @@ def rpc(self) -> CheckRes: @property def blocks(self) -> CheckRes: """Checks that local skaled is mining blocks""" - if self.config.status: + if self.config_file.status: http_endpoint = get_local_schain_http_endpoint(self.name) return CheckRes(check_endpoint_blocks(http_endpoint)) return CheckRes(False) @@ -188,22 +259,59 @@ def process(self) -> CheckRes: """Checks that sChain monitor process is running""" return CheckRes(is_monitor_process_alive(self.schain_record.monitor_id)) + +class SChainChecks(IChecks): + def __init__( + self, + schain_name: str, + node_id: int, + schain_record: SChainRecord, + rule_controller: IRuleController, + rotation_id: int = 0, + *, + ima_linked: bool = True, + dutils: DockerUtils = None + ): + self._subjects = [ + ConfigChecks( + schain_name=schain_name, + node_id=node_id, + schain_record=schain_record, + rotation_id=rotation_id + ), + ContainerChecks( + schain_name=schain_name, + schain_record=schain_record, + rule_controller=rule_controller, + ima_linked=ima_linked, + dutils=dutils + ) + ] + + def __getattr__(self, attr: str) -> Any: + for subj in self._subjects: + if attr in dir(subj): + return getattr(subj, attr) + raise AttributeError(f'No such attribute {attr}') + def get_all(self, log=True, save=False, checks_filter=None): if not checks_filter: checks_filter = API_ALLOWED_CHECKS - checks_dict = {} - for check in checks_filter: - if check == 'ima_container' and (DISABLE_IMA or not self.ima_linked): - logger.info(f'Check {check} will be skipped - IMA is not linked') - elif check not in API_ALLOWED_CHECKS: - logger.warning(f'Check {check} is not allowed or does not exist') - else: - checks_dict[check] = getattr(self, check).status + + plain_checks = {} + for subj in self._subjects: + subj_checks = subj.get_all( + log=False, + save=False, + checks_filter=checks_filter + ) + plain_checks.update(subj_checks) + if log: - log_checks_dict(self.name, checks_dict) + log_checks_dict(self.name, plain_checks) if save: - save_checks_dict(self.name, checks_dict) - return checks_dict + save_checks_dict(self.name, plain_checks) + return plain_checks def is_healthy(self): checks = self.get_all() diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index e86306f69..65574bc7d 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -117,7 +117,7 @@ def test_config_check(schain_checks, sample_false_checks): def test_config_check_wrong_version(schain_checks): - schain_checks.schain_record = SchainRecordMock('9.8.7') + schain_checks._subjects[0].schain_record = SchainRecordMock('9.8.7') assert not schain_checks.config.status From 615d0e9efcfb65c6e7e0ec5c554c0f816c634e26 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 26 May 2023 18:15:07 +0000 Subject: [PATCH 19/84] Bump requirements --- requirements.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 225683fac..393b038ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ peewee==3.9.5 -Flask==2.0.2 -Werkzeug==2.0.2 +Flask==2.2.5 +Werkzeug==2.2.2 gunicorn==20.1.0 Jinja2==3.0.3 -docker==5.0.3 +docker==6.1.2 simple-crypt==4.1.7 pycryptodome==3.12.0 python-iptables==1.0.0 -skale.py==5.8dev4 +skale.py==5.8b1 ima-predeployed==1.3.5b1 etherbase-predeployed==1.1.0b1 @@ -29,8 +29,8 @@ celery==5.2.2 filelock==3.0.12 -pyOpenSSL==19.1.0 -cryptography==35.0.0 +pyOpenSSL==23.1.1 +cryptography==39.0.1 python-dateutil==2.8.1 python-telegram-bot==12.8 sh==1.14.1 From 91489a2f5b5affdb0c3f52ddd900841e1267fc5b Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 29 May 2023 21:03:24 +0000 Subject: [PATCH 20/84] Add action module. Add config monitor module --- core/schains/monitor/action.py | 325 +++++++++++++++++++++++++ core/schains/monitor/config_monitor.py | 43 ++++ core/schains/monitor/tasks.py | 69 ------ core/schains/task.py | 39 +++ tests/schains/monitor/tasks_test.py | 7 - tests/schains/task_test.py | 29 +++ 6 files changed, 436 insertions(+), 76 deletions(-) create mode 100644 core/schains/monitor/action.py create mode 100644 core/schains/monitor/config_monitor.py delete mode 100644 core/schains/monitor/tasks.py create mode 100644 core/schains/task.py delete mode 100644 tests/schains/monitor/tasks_test.py create mode 100644 tests/schains/task_test.py diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py new file mode 100644 index 000000000..319f94164 --- /dev/null +++ b/core/schains/monitor/action.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +# +# This file is part of SKALE Admin +# +# Copyright (C) 2021-Present SKALE Labs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import time +import logging +from datetime import datetime +from functools import wraps + +from skale import Skale + +from core.node_config import NodeConfig +from core.schains.checks import IChecks +from core.schains.dkg import safe_run_dkg, save_dkg_results, DkgError +from core.schains.dkg.utils import get_secret_key_share_filepath +from core.schains.cleaner import ( + remove_schain_container, + remove_schain_volume +) +from core.schains.firewall.types import IRuleController + +from core.schains.volume import init_data_volume +from core.schains.rotation import get_schain_public_key + +from core.schains.limits import get_schain_type + +from core.schains.monitor.containers import monitor_schain_container, monitor_ima_container +from core.schains.monitor.rpc import handle_failed_schain_rpc +from core.schains.runner import ( + restart_container, is_container_exists, get_container_name +) +from core.schains.config import init_schain_config, init_schain_config_dir +from core.schains.config.directory import get_schain_config +from core.schains.config.helper import ( + get_base_port_from_config, + get_node_ips_from_config, + get_own_ip_from_config +) +from core.schains.ima import ImaData +from core.schains.skaled_status import init_skaled_status + +from tools.docker_utils import DockerUtils +from tools.str_formatters import arguments_list_string +from tools.configs.containers import SCHAIN_CONTAINER + +from web.models.schain import upsert_schain_record, set_first_run, SChainRecord + + +logger = logging.getLogger(__name__) + + +CONTAINER_POST_RUN_DELAY = 20 +SCHAIN_CLEANUP_TIMEOUT = 10 + + +class BaseActionManager: + def __init__(self, name: str): + self.name = name + self.executed_blocks = {} + self.p = f'{type(self).__name__} - schain: {self.name} -' + + @staticmethod + def monitor_block(f): + @wraps(f) + def _monitor_block(self, *args, **kwargs): + ts = time.time() + initial_status = f(self, *args, **kwargs) + te = time.time() + self.executed_blocks[f.__name__] = { + 'ts': ts, + 'te': te, + 'initial_status': initial_status + } + return initial_status + return _monitor_block + + @property + def schain_record(self) -> SChainRecord: + return upsert_schain_record(self.name) + + def _upd_last_seen(self) -> None: + self.schain_record.set_monitor_last_seen(datetime.now()) + + def _upd_schain_record(self) -> None: + if self.schain_record.first_run: + self.schain_record.set_restart_count(0) + self.schain_record.set_failed_rpc_count(0) + set_first_run(self.name, False) + self.schain_record.set_new_schain(False) + logger.info( + f'sChain {self.name}: ' + f'restart_count - {self.schain_record.restart_count}, ' + f'failed_rpc_count - {self.schain_record.failed_rpc_count}' + ) + + +class ConfigActionManager(BaseActionManager): + def __init__( + self, + skale: Skale, + schain: dict, + node_config: NodeConfig, + rotation_data: dict, + checks: IChecks + ): + self.skale = skale + self.schain = schain + self.generation = schain['generation'] + self.node_config = node_config + self.checks = checks + + self.rotation_data = rotation_data + self.rotation_id = rotation_data['rotation_id'] + self.finish_ts = skale.node_rotation.get_schain_finish_ts( + node_id=rotation_data['leaving_node'], + schain_name=self.schain['name'] + ) + super().__init__(name=schain['name']) + + @BaseActionManager.monitor_block + def config_dir(self) -> bool: + initial_status = self.checks.config_dir.status + if not initial_status: + init_schain_config_dir(self.name) + else: + logger.info(f'{self.p} config_dir - ok') + return initial_status + + @BaseActionManager.monitor_block + def dkg(self) -> bool: + initial_status = self.checks.dkg.status + if not initial_status: + dkg_result = safe_run_dkg( + skale=self.skale, + schain_name=self.name, + node_id=self.node_config.id, + sgx_key_name=self.node_config.sgx_key_name, + rotation_id=self.rotation_id + ) + if dkg_result.status.is_done(): + save_dkg_results( + dkg_result.keys_data, + get_secret_key_share_filepath(self.name, self.rotation_id) + ) + self.schain_record.set_dkg_status(dkg_result.status) + if not dkg_result.status.is_done(): + raise DkgError(f'{self.p} DKG failed') + else: + logger.info(f'{self.p} dkg - ok') + return initial_status + + @BaseActionManager.monitor_block + def config(self, overwrite=False) -> bool: + initial_status = self.checks.config.status + if not initial_status or overwrite: + init_schain_config( + skale=self.skale, + node_id=self.node_config.id, + schain_name=self.name, + generation=self.generation, + ecdsa_sgx_key_name=self.node_config.sgx_key_name, + rotation_data=self.rotation_data, + schain_record=self.schain_record + ) + else: + logger.info(f'{self.p} config - ok') + return initial_status + + +class ContainerActionManager(BaseActionManager): + def __init__( + self, + ima_data: ImaData, + schain: dict, + checks: IChecks, + rule_controller: IRuleController, + finish_ts: int, + dutils: DockerUtils = None + ): + self.ima_data = ima_data + self.schain = schain + self.generation = schain['generation'] + self.checks = checks + + self.rc = rule_controller + self.skaled_status = init_skaled_status(self.name) + self.schain_type = get_schain_type(schain['partOfNode']) + + self.dutils = dutils or DockerUtils() + + super().__init__(name=schain['name']) + + @BaseActionManager.monitor_block + def volume(self) -> bool: + initial_status = self.checks.volume.status + if not initial_status: + init_data_volume(self.schain, dutils=self.dutils) + else: + logger.info(f'{self.p} volume - ok') + return initial_status + + @BaseActionManager.monitor_block + def firewall_rules(self, overwrite=False) -> bool: + initial_status = self.checks.firewall_rules.status + if not initial_status: + logger.info('Configuring firewall rules') + conf = get_schain_config(self.name) + base_port = get_base_port_from_config(conf) + node_ips = get_node_ips_from_config(conf) + own_ip = get_own_ip_from_config(conf) + self.rc.configure( + base_port=base_port, + own_ip=own_ip, + node_ips=node_ips + ) + self.rc.sync() + return initial_status + + @BaseActionManager.monitor_block + def skaled_container(self, download_snapshot: bool = False, delay_start: bool = False) -> bool: + initial_status = self.checks.skaled_container.status + if not initial_status: + public_key, start_ts = None, None + + if download_snapshot: + public_key = get_schain_public_key(self.skale, self.name) + if delay_start: + start_ts = self.finish_ts + + monitor_schain_container( + self.schain, + schain_record=self.schain_record, + skaled_status=self.skaled_status, + public_key=public_key, + start_ts=start_ts, + dutils=self.dutils + ) + time.sleep(CONTAINER_POST_RUN_DELAY) + else: + self.schain_record.set_restart_count(0) + logger.info(f'{self.p} skaled_container - ok') + return initial_status + + @BaseActionManager.monitor_block + def restart_skaled_container(self) -> bool: + initial_status = True + if not is_container_exists(self.name, dutils=self.dutils): + logger.info(f'sChain {self.name}: container doesn\'t exits, running container...') + initial_status = self.skaled_container() + else: + restart_container(SCHAIN_CONTAINER, self.schain, dutils=self.dutils) + return initial_status + + @BaseActionManager.monitor_block + def reloaded_skaled_container(self) -> bool: + logger.info('Starting skaled with reloaded configuration') + initial_status = True + if is_container_exists(self.name, dutils=self.dutils): + remove_schain_container(self.name, dutils=self.dutils) + else: + logger.warning(f'sChain {self.name}: container doesn\'t exists') + initial_status = self.skaled_container() + return initial_status + + @BaseActionManager.monitor_block + def skaled_rpc(self) -> bool: + initial_status = self.checks.rpc.status + if not initial_status: + self.display_skaled_logs() + handle_failed_schain_rpc( + self.schain, + schain_record=self.schain_record, + skaled_status=self.skaled_status, + dutils=self.dutils + ) + else: + self.schain_record.set_failed_rpc_count(0) + logger.info(f'{self.p} rpc - ok') + return initial_status + + @BaseActionManager.monitor_block + def ima_container(self) -> bool: + initial_status = self.checks.ima_container.status + if not initial_status: + monitor_ima_container( + self.schain, + self.ima_data, + dutils=self.dutils + ) + else: + logger.info(f'{self.p} ima_container - ok') + return initial_status + + @BaseActionManager.monitor_block + def cleanup_schain_docker_entity(self) -> bool: + remove_schain_container(self.name, dutils=self.dutils) + time.sleep(SCHAIN_CLEANUP_TIMEOUT) + remove_schain_volume(self.name, dutils=self.dutils) + return True + + def log_executed_blocks(self) -> None: + logger.info(arguments_list_string( + self.executed_blocks, f'Finished monitor runner - {self.name}')) + + def display_skaled_logs(self) -> None: + if is_container_exists(self.name, dutils=self.dutils): + container_name = get_container_name(SCHAIN_CONTAINER, self.name) + self.dutils.display_container_logs(container_name) + else: + logger.warning(f'sChain {self.name}: container doesn\'t exists, could not show logs') diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py new file mode 100644 index 000000000..b27725aa1 --- /dev/null +++ b/core/schains/monitor/config_monitor.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# This file is part of SKALE Admin +# +# Copyright (C) 2021 SKALE Labs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import logging +from abc import abstractmethod + +from core.schains.monitor.base_monitor import IMonitor +from core.schains.monitor.action_manager import ConfigActionManager + + +logger = logging.getLogger(__name__) + + +class BaseConfigMonitor(IMonitor): + def __init__(self, action_manager: ConfigActionManager): + self.action_manager = action_manager + + @abstractmethod + def run(self) -> None: + pass + + +class RegularConfigMonitor(BaseConfigMonitor): + def run(self) -> None: + self.action_manager.config_dir() + self.action_manager.dkg() + self.action_manager.config() diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py deleted file mode 100644 index ae9885301..000000000 --- a/core/schains/monitor/tasks.py +++ /dev/null @@ -1,69 +0,0 @@ -import time -import logging -from concurrent.futures import ThreadPoolExecutor -from typing import Callable - -logger = logging.getLogger(__name__) - - -class Task: - def __init__( - self, - schain: str, - name: str, - action: Callable, - index: int, - *args, - **kwargs - ) -> None: - self.schain = schain - self.name = name - self.action = action - self.index = index - self.args = args - self.kwargs = kwargs - - @property - def signature(self) -> str: - return f'[{self.schain}-{self.name}]' - - def run(self): - self.action(*self.args, **self.kwargs) - - -def ensure_tasks(executor, tasks, futures): - for i, task in enumerate(tasks): - f = futures[i] - if f is not None and not f.running(): - result = f.result() - logger.info('Task %s finished with %s', task.signature, result) - if f is None or not f.running(): - logger.info('Launching task %s', task.signature) - futures[i] = executor.submit(task.run()) - - -def start_tasks(schain: str): - logger.info('Starting schain %s tasks', schain) - tasks = [ - Task(schain, 'config-task', monitor_chain, 0), - Task(schain, 'skaled-task', monitor_chain, 1), - ] - futures = [None for i in range(len(tasks))] - with ThreadPoolExecutor(max_workers=len(tasks)) as executor: - while True: - ensure_tasks(executor, tasks, futures) - - -def monitor_chain(): - for i in range(50): - if i % 5 == 0: - logger.info('Monitoring chain %d', i) - time.sleep(2) - - -def monitor_config(): - pass - - -def monitor_skaled(): - pass diff --git a/core/schains/task.py b/core/schains/task.py new file mode 100644 index 000000000..5261c55f1 --- /dev/null +++ b/core/schains/task.py @@ -0,0 +1,39 @@ +import logging +import time +from concurrent.futures import Future, ThreadPoolExecutor +from typing import Callable, List, Optional + +logger = logging.getLogger(__name__) + + +class Task: + def __init__(self, name: str, action: Callable, index: int = 0) -> None: + self.name = name + self.index = index + self.action = action + + def run(self) -> None: + self.action() + + +def keep_tasks_running( + executor: ThreadPoolExecutor, + tasks: List[Task], + futures: List[Optional[Future]] +) -> None: + for i, task in enumerate(tasks): + future = futures[i] + if future is not None and not future.running(): + result = future.result() + logger.info('Task %s finished with %s', task.name, result) + if future is None or not future.running(): + logger.info('Running task %s', task.name) + futures[i] = executor.submit(task.run) + + +def run_tasks(tasks: List[Task]) -> None: + with ThreadPoolExecutor(max_workers=len(tasks)) as executor: + futures: List[Optional[Future]] = [None for i in range(len(tasks))] + while True: + keep_tasks_running(executor, tasks, futures) + time.sleep(30) diff --git a/tests/schains/monitor/tasks_test.py b/tests/schains/monitor/tasks_test.py deleted file mode 100644 index 93faa0577..000000000 --- a/tests/schains/monitor/tasks_test.py +++ /dev/null @@ -1,7 +0,0 @@ -import time -from core.schains.monitor.tasks import start_tasks - - -def test_tasks(): - start_tasks('test-chain') - time.sleep(60) diff --git a/tests/schains/task_test.py b/tests/schains/task_test.py new file mode 100644 index 000000000..b27f41e66 --- /dev/null +++ b/tests/schains/task_test.py @@ -0,0 +1,29 @@ +import functools +import time +from core.schains.task import run_tasks, Task + +ITERATIONS = 10 +SCHAINS_NUM = 10 + + +class StopActionError(Exception): + pass + + +def action(name): + for i in range(ITERATIONS): + time.sleep(2) + raise StopActionError(f'Stopping {name}') + + +def test_tasks(): + tasks = [ + Task( + f'test-schain-{i}', + functools.partial(action, name=f'test-schain-{i}'), + i + ) + for i in range(SCHAINS_NUM) + ] + run_tasks(tasks=tasks) + time.sleep(3) From 70a5210645e3d815f2c56f4c808d025c9885d5b6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 May 2023 12:11:15 +0000 Subject: [PATCH 21/84] Add tests for config actions --- .../monitor/action/config_action_test.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 tests/schains/monitor/action/config_action_test.py diff --git a/tests/schains/monitor/action/config_action_test.py b/tests/schains/monitor/action/config_action_test.py new file mode 100644 index 000000000..48e894150 --- /dev/null +++ b/tests/schains/monitor/action/config_action_test.py @@ -0,0 +1,65 @@ +import pytest + +from core.schains.checks import ConfigChecks +from core.schains.monitor.action import ConfigActionManager + +from web.models.schain import SChainRecord + + +@pytest.fixture +def rotation_data(schain_db, skale): + return skale.node_rotation.get_rotation(schain_db) + + +@pytest.fixture +def config_checks( + schain_db, + skale, + node_config, + schain_on_contracts, + rotation_data +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return ConfigChecks( + schain_name=name, + node_id=node_config.id, + schain_record=schain_record, + rotation_id=rotation_data['rotation_id'] + ) + + +@pytest.fixture +def config_am( + schain_db, + skale, + node_config, + schain_on_contracts, + predeployed_ima, + secret_key, + config_checks +): + name = schain_db + print('IVD', name) + print('IVD', schain_on_contracts) + rotation_data = skale.node_rotation.get_rotation(name) + schain = skale.schains.get_by_name(name) + print('IVD', schain) + return ConfigActionManager( + skale=skale, + schain=schain, + node_config=node_config, + rotation_data=rotation_data, + checks=config_checks + ) + + +def test_config_actions(config_am, config_checks): + config_am.config_dir() + assert config_checks.config_dir.status + assert not config_checks.config.status + + # DKG action is tested separetely in dkg_test module + + config_am.config() + assert config_checks.config.status From 9b9ac74dbc5518bb34496198b4dcc53c38cfca8c Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 May 2023 12:33:02 +0000 Subject: [PATCH 22/84] Add tests for container actions --- .../monitor/action/container_action_test.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/schains/monitor/action/container_action_test.py diff --git a/tests/schains/monitor/action/container_action_test.py b/tests/schains/monitor/action/container_action_test.py new file mode 100644 index 000000000..bf5b6ceec --- /dev/null +++ b/tests/schains/monitor/action/container_action_test.py @@ -0,0 +1,68 @@ +import pytest + +from core.schains.checks import ContainerChecks +from core.schains.monitor.action import ContainerActionManager + +from web.models.schain import SChainRecord + + +@pytest.fixture +def rotation_data(schain_db, skale): + return skale.node_rotation.get_rotation(schain_db) + + +@pytest.fixture +def container_checks( + schain_db, + skale, + rule_controller, + dutils +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return ContainerChecks( + schain_name=name, + schain_record=schain_record, + rule_controller=rule_controller, + ima_linked=True, + dutils=dutils + ) + + +@pytest.fixture +def container_am( + schain_db, + skale, + node_config, + rule_controller, + schain_on_contracts, + predeployed_ima, + rotation_data, + secret_key, + ima_data, + ssl_folder, + dutils, + container_checks +): + name = schain_db + finish_ts = skale.node_rotation.get_schain_finish_ts( + node_id=rotation_data['leaving_node'], + schain_name=name + ) + rotation_data = skale.node_rotation.get_rotation(name) + schain = skale.schains.get_by_name(name) + return ContainerActionManager( + schain=schain, + rule_controller=rule_controller, + ima_data=ima_data, + finish_ts=finish_ts, + checks=container_checks, + dutils=dutils + ) + + +def test_container_actions(container_am, container_checks): + container_am.firewall_rules() + container_am.volume() + container_am.skaled_container() + container_am.ima_container() From a14f9b318b4eb7268dd30a85e1dcb7d21541eab4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 May 2023 12:54:26 +0000 Subject: [PATCH 23/84] Extend action tests --- .../monitor/action/config_action_test.py | 3 -- ...r_action_test.py => skaled_action_test.py} | 33 +++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) rename tests/schains/monitor/action/{container_action_test.py => skaled_action_test.py} (61%) diff --git a/tests/schains/monitor/action/config_action_test.py b/tests/schains/monitor/action/config_action_test.py index 48e894150..2125976a4 100644 --- a/tests/schains/monitor/action/config_action_test.py +++ b/tests/schains/monitor/action/config_action_test.py @@ -40,11 +40,8 @@ def config_am( config_checks ): name = schain_db - print('IVD', name) - print('IVD', schain_on_contracts) rotation_data = skale.node_rotation.get_rotation(name) schain = skale.schains.get_by_name(name) - print('IVD', schain) return ConfigActionManager( skale=skale, schain=schain, diff --git a/tests/schains/monitor/action/container_action_test.py b/tests/schains/monitor/action/skaled_action_test.py similarity index 61% rename from tests/schains/monitor/action/container_action_test.py rename to tests/schains/monitor/action/skaled_action_test.py index bf5b6ceec..1bf8935da 100644 --- a/tests/schains/monitor/action/container_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -1,7 +1,7 @@ import pytest -from core.schains.checks import ContainerChecks -from core.schains.monitor.action import ContainerActionManager +from core.schains.checks import SkaledChecks +from core.schains.monitor.action import SkaledActionManager from web.models.schain import SChainRecord @@ -12,7 +12,7 @@ def rotation_data(schain_db, skale): @pytest.fixture -def container_checks( +def skaled_checks( schain_db, skale, rule_controller, @@ -20,7 +20,7 @@ def container_checks( ): name = schain_db schain_record = SChainRecord.get_by_name(name) - return ContainerChecks( + return SkaledChecks( schain_name=name, schain_record=schain_record, rule_controller=rule_controller, @@ -30,7 +30,7 @@ def container_checks( @pytest.fixture -def container_am( +def skaled_am( schain_db, skale, node_config, @@ -42,7 +42,7 @@ def container_am( ima_data, ssl_folder, dutils, - container_checks + skaled_checks ): name = schain_db finish_ts = skale.node_rotation.get_schain_finish_ts( @@ -51,18 +51,25 @@ def container_am( ) rotation_data = skale.node_rotation.get_rotation(name) schain = skale.schains.get_by_name(name) - return ContainerActionManager( + return SkaledActionManager( schain=schain, rule_controller=rule_controller, ima_data=ima_data, finish_ts=finish_ts, - checks=container_checks, + checks=skaled_checks, dutils=dutils ) -def test_container_actions(container_am, container_checks): - container_am.firewall_rules() - container_am.volume() - container_am.skaled_container() - container_am.ima_container() +def test_skaled_actions(skaled_am, skaled_checks, cleanup_schain_containers): + try: + skaled_am.firewall_rules() + assert skaled_checks.firewall_rules + skaled_am.volume() + assert skaled_checks.volume + skaled_am.skaled_container() + assert skaled_checks.skaled_container + skaled_am.ima_container() + assert skaled_checks.ima_container + finally: + skaled_am.cleanup_schain_docker_entity() From f45541c8b4cd02c7fa663efc4adbc2cccb3c90b7 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 May 2023 12:55:11 +0000 Subject: [PATCH 24/84] Rename container -> skaled --- core/schains/checks.py | 2 +- core/schains/monitor/action.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 3c86b44dd..672893e92 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -147,7 +147,7 @@ def is_healthy(self) -> bool: return False not in checks.values() -class ContainerChecks(IChecks): +class SkaledChecks(IChecks): def __init__( self, schain_name: str, diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 319f94164..b5a2ca202 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -182,14 +182,14 @@ def config(self, overwrite=False) -> bool: return initial_status -class ContainerActionManager(BaseActionManager): +class SkaledActionManager(BaseActionManager): def __init__( self, - ima_data: ImaData, schain: dict, - checks: IChecks, + ima_data: ImaData, rule_controller: IRuleController, finish_ts: int, + checks: IChecks, dutils: DockerUtils = None ): self.ima_data = ima_data @@ -198,7 +198,7 @@ def __init__( self.checks = checks self.rc = rule_controller - self.skaled_status = init_skaled_status(self.name) + self.skaled_status = init_skaled_status(self.schain['name']) self.schain_type = get_schain_type(schain['partOfNode']) self.dutils = dutils or DockerUtils() From 171cb9d0e53b01658918d3873a866e7a6f81b5eb Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 May 2023 12:55:33 +0000 Subject: [PATCH 25/84] Extract secret_key from config fixture in conftest --- tests/conftest.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bc349c9d1..cecd1b305 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -342,18 +342,28 @@ def _schain_name(): @pytest.fixture -def schain_config(_schain_name, predeployed_ima): +def secret_key(_schain_name): + schain_dir_path = os.path.join(SCHAINS_DIR_PATH, _schain_name) + secret_key_path = os.path.join(schain_dir_path, 'secret_key_0.json') + try: + pathlib.Path(schain_dir_path).mkdir(parents=True, exist_ok=True) + with open(secret_key_path, 'w') as key_file: + json.dump(SECRET_KEY, key_file) + yield SECRET_KEY + finally: + rm_schain_dir(_schain_name) + + +@pytest.fixture +def schain_config(_schain_name, secret_key, predeployed_ima): schain_dir_path = os.path.join(SCHAINS_DIR_PATH, _schain_name) - pathlib.Path(schain_dir_path).mkdir(parents=True, exist_ok=True) config_path = os.path.join(schain_dir_path, f'schain_{_schain_name}.json') - secret_key_path = os.path.join(schain_dir_path, 'secret_key_0.json') - schain_config = generate_schain_config(_schain_name) - with open(config_path, 'w') as config_file: - json.dump(schain_config, config_file) - with open(secret_key_path, 'w') as key_file: - json.dump(SECRET_KEY, key_file) try: + pathlib.Path(schain_dir_path).mkdir(parents=True, exist_ok=True) + schain_config = generate_schain_config(_schain_name) + with open(config_path, 'w') as config_file: + json.dump(schain_config, config_file) yield schain_config finally: rm_schain_dir(_schain_name) @@ -467,7 +477,7 @@ def schain_on_contracts(skale, nodes, _schain_name) -> str: yield create_schain( skale, schain_type=1, # test2 should have 1 index - random_name=True + schain_name=_schain_name ) finally: cleanup_nodes_schains(skale) From 5349ff564c1e21b47a31071e404df58eb8438326 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 1 Jun 2023 15:30:31 +0000 Subject: [PATCH 26/84] Save config to a new path with timestamp and rotation_id --- core/schains/config/directory.py | 11 +++++++ core/schains/config/main.py | 41 ++++++++++++++++++++++++-- core/schains/monitor/action.py | 4 +-- core/schains/monitor/config_monitor.py | 2 +- 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index fc5e209eb..dfc390b1d 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -20,6 +20,7 @@ import os import json import logging +import time from pathlib import Path from tools.configs import SCHAIN_CONFIG_DIR_SKALED @@ -36,6 +37,11 @@ def _config_filename(name: str) -> str: return f'schain_{name}.json' +def new_config_filename(name: str, rotation_id: int) -> str: + ts = int(time.time()) + return f'schain_{name}_{rotation_id}_{ts}.json' + + def schain_config_dir(name: str) -> str: """Get sChain config directory path in container""" return os.path.join(SCHAINS_DIR_PATH, name) @@ -59,6 +65,11 @@ def schain_config_filepath(name: str, in_schain_container=False) -> str: return os.path.join(schain_dir_path, _config_filename(name)) +def new_schain_config_filepath(name: str, rotation_id: int, in_schain_container=False) -> str: + schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) + return os.path.join(schain_dir_path, new_config_filename(name, rotation_id)) + + def skaled_status_filepath(name: str) -> str: return os.path.join(schain_config_dir(name), SKALED_STATUS_FILENAME) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 81aa39288..0c53c1178 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -26,7 +26,7 @@ from core.node import get_skale_node_version from core.schains.config.generator import generate_schain_config_with_skale from core.schains.config.directory import get_tmp_schain_config_filepath -from core.schains.config.directory import schain_config_filepath +from core.schains.config.directory import new_schain_config_filepath, schain_config_filepath from tools.str_formatters import arguments_list_string @@ -64,14 +64,49 @@ def init_schain_config( update_schain_config_version(schain_name, schain_record=schain_record) +def init_schain_config2( + skale: Skale, + node_id: int, + schain_name: str, + generation: int, + ecdsa_sgx_key_name: str, + rotation_data: dict, + schain_record: SChainRecord +): + logger.info('Generating sChain config for %s', schain_name) + + schain_config = generate_schain_config_with_skale( + skale=skale, + schain_name=schain_name, + generation=generation, + node_id=node_id, + rotation_data=rotation_data, + ecdsa_key_name=ecdsa_sgx_key_name + ) + save_new_schain_config( + schain_config.to_dict(), + schain_name, + rotation_data['rotation_id'] + ) + update_schain_config_version(schain_name, schain_record=schain_record) + + def save_schain_config(schain_config, schain_name): tmp_config_filepath = get_tmp_schain_config_filepath(schain_name) - with open(tmp_config_filepath, 'w') as outfile: + with open(tmp_config_fiepath, 'w') as outfile: json.dump(schain_config, outfile, indent=4) config_filepath = schain_config_filepath(schain_name) shutil.move(tmp_config_filepath, config_filepath) +def save_new_schain_config(schain_config, schain_name, rotation_id): + tmp_config_filepath = get_tmp_schain_config_filepath(schain_name) + with open(tmp_config_filepath, 'w') as outfile: + json.dump(schain_config, outfile, indent=4) + config_filepath = new_schain_config_filepath(schain_name, rotation_id) + shutil.move(tmp_config_filepath, config_filepath) + + def update_schain_config_version(schain_name, schain_record=None): new_config_version = get_skale_node_version() schain_record = schain_record or upsert_schain_record(schain_name) @@ -83,6 +118,6 @@ def update_schain_config_version(schain_name, schain_record=None): def schain_config_version_match(schain_name, schain_record=None): schain_record = schain_record or upsert_schain_record(schain_name) skale_node_version = get_skale_node_version() - logger.debug(f'config check, schain: {schain_name}, config_version: \ + logger.info(f'config check, schain: {schain_name}, config_version: \ {schain_record.config_version}, skale_node_version: {skale_node_version}') return schain_record.config_version == skale_node_version diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index b5a2ca202..78af10413 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -44,7 +44,7 @@ from core.schains.runner import ( restart_container, is_container_exists, get_container_name ) -from core.schains.config import init_schain_config, init_schain_config_dir +from core.schains.config import init_schain_config2, init_schain_config_dir from core.schains.config.directory import get_schain_config from core.schains.config.helper import ( get_base_port_from_config, @@ -168,7 +168,7 @@ def dkg(self) -> bool: def config(self, overwrite=False) -> bool: initial_status = self.checks.config.status if not initial_status or overwrite: - init_schain_config( + init_schain_config2( skale=self.skale, node_id=self.node_config.id, schain_name=self.name, diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index b27725aa1..aefaae928 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -21,7 +21,7 @@ from abc import abstractmethod from core.schains.monitor.base_monitor import IMonitor -from core.schains.monitor.action_manager import ConfigActionManager +from core.schains.monitor.action import ConfigActionManager logger = logging.getLogger(__name__) From d7d433b2803cbacddcfba9b904feeb75afc0dce2 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 7 Jun 2023 18:56:40 +0000 Subject: [PATCH 27/84] Introduce new monitor flow --- Dockerfile | 2 +- core/schains/checks.py | 2 +- core/schains/config/main.py | 29 ++- core/schains/firewall/__init__.py | 1 + core/schains/monitor/action.py | 15 +- core/schains/monitor/base_monitor.py | 6 + core/schains/monitor/config_monitor.py | 12 +- core/schains/monitor/main.py | 231 +++++++++++++----- core/schains/process_manager.py | 2 +- core/schains/task.py | 4 +- tests/schains/monitor/regular_monitor_test.py | 2 +- 11 files changed, 226 insertions(+), 80 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2f6efa585..a7d00e9b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-buster +FROM python:3.9-buster RUN apt-get update && apt-get install -y wget git libxslt-dev iptables kmod swig3.0 RUN ln -s /usr/bin/swig3.0 /usr/bin/swig diff --git a/core/schains/checks.py b/core/schains/checks.py index 672893e92..709a3adbb 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -279,7 +279,7 @@ def __init__( schain_record=schain_record, rotation_id=rotation_id ), - ContainerChecks( + SkaledChecks( schain_name=schain_name, schain_record=schain_record, rule_controller=rule_controller, diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 0c53c1178..863f04469 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -18,16 +18,21 @@ # along with this program. If not, see . import json +import os import shutil import logging +from typing import Optional from skale import Skale from core.node import get_skale_node_version +from core.schains.config.directory import ( + get_tmp_schain_config_filepath, + new_schain_config_filepath, + schain_config_dir, + schain_config_filepath +) from core.schains.config.generator import generate_schain_config_with_skale -from core.schains.config.directory import get_tmp_schain_config_filepath -from core.schains.config.directory import new_schain_config_filepath, schain_config_filepath - from tools.str_formatters import arguments_list_string from web.models.schain import upsert_schain_record, SChainRecord @@ -93,7 +98,7 @@ def init_schain_config2( def save_schain_config(schain_config, schain_name): tmp_config_filepath = get_tmp_schain_config_filepath(schain_name) - with open(tmp_config_fiepath, 'w') as outfile: + with open(tmp_config_filepath, 'w') as outfile: json.dump(schain_config, outfile, indent=4) config_filepath = schain_config_filepath(schain_name) shutil.move(tmp_config_filepath, config_filepath) @@ -107,6 +112,11 @@ def save_new_schain_config(schain_config, schain_name, rotation_id): shutil.move(tmp_config_filepath, config_filepath) +def set_as_upstream_config(schain_name: str, config_path: str) -> None: + upstream_link_filepath = schain_config_filepath(schain_name) + shutil.copy(config_path, upstream_link_filepath) + + def update_schain_config_version(schain_name, schain_record=None): new_config_version = get_skale_node_version() schain_record = schain_record or upsert_schain_record(schain_name) @@ -121,3 +131,14 @@ def schain_config_version_match(schain_name, schain_record=None): logger.info(f'config check, schain: {schain_name}, config_version: \ {schain_record.config_version}, skale_node_version: {skale_node_version}') return schain_record.config_version == skale_node_version + + +def get_latest_config_filepath(schain_name) -> Optional[str]: + config_dir = schain_config_dir(schain_name) + dir_files = sorted( + filter(lambda f: not os.path.islink(f), os.listdir(config_dir)), + key=lambda fname: os.stat(fname, follow_symlinks=False).st_mtime + ) + if not dir_files: + return None + return dir_files[-1] diff --git a/core/schains/firewall/__init__.py b/core/schains/firewall/__init__.py index 85a7c06b0..8edbd1a7c 100644 --- a/core/schains/firewall/__init__.py +++ b/core/schains/firewall/__init__.py @@ -20,4 +20,5 @@ from .firewall_manager import SChainFirewallManager # noqa from .iptables import IptablesController # noqa from .rule_controller import SChainRuleController # noqa +from .types import IRuleController # noqa from .utils import get_default_rule_controller # noqa diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 78af10413..92bfa1abc 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -42,9 +42,16 @@ from core.schains.monitor.containers import monitor_schain_container, monitor_ima_container from core.schains.monitor.rpc import handle_failed_schain_rpc from core.schains.runner import ( - restart_container, is_container_exists, get_container_name + restart_container, + is_container_exists, + get_container_name ) -from core.schains.config import init_schain_config2, init_schain_config_dir +from core.schains.config.main import ( + get_latest_config_filepath, + init_schain_config2, + set_as_upstream_config +) +from core.schains.config import init_schain_config_dir from core.schains.config.directory import get_schain_config from core.schains.config.helper import ( get_base_port_from_config, @@ -313,6 +320,10 @@ def cleanup_schain_docker_entity(self) -> bool: remove_schain_volume(self.name, dutils=self.dutils) return True + def set_upstream_config(self) -> bool: + latest_filepath = get_latest_config_filepath(self.name) + set_as_upstream_config(self.name, latest_filepath) + def log_executed_blocks(self) -> None: logger.info(arguments_list_string( self.executed_blocks, f'Finished monitor runner - {self.name}')) diff --git a/core/schains/monitor/base_monitor.py b/core/schains/monitor/base_monitor.py index 5a0e46e2d..94952e1d5 100644 --- a/core/schains/monitor/base_monitor.py +++ b/core/schains/monitor/base_monitor.py @@ -70,6 +70,12 @@ SCHAIN_CLEANUP_TIMEOUT = 10 +class IMonitor(ABC): + @abstractmethod + def run(self): + pass + + class BaseMonitor(ABC): def __init__( self, diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index aefaae928..88cd39b49 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -21,15 +21,15 @@ from abc import abstractmethod from core.schains.monitor.base_monitor import IMonitor -from core.schains.monitor.action import ConfigActionManager +from core.schains.monitor.action import SkaledActionManager logger = logging.getLogger(__name__) class BaseConfigMonitor(IMonitor): - def __init__(self, action_manager: ConfigActionManager): - self.action_manager = action_manager + def __init__(self, action_manager: SkaledActionManager): + self.am = action_manager @abstractmethod def run(self) -> None: @@ -38,6 +38,6 @@ def run(self) -> None: class RegularConfigMonitor(BaseConfigMonitor): def run(self) -> None: - self.action_manager.config_dir() - self.action_manager.dkg() - self.action_manager.config() + self.am.firewall_rules() + self.am.volume() + self.am.skaled_container() diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 55e58557e..6f6b9205a 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -17,15 +17,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import functools import time import random import logging -from importlib import reload +from typing import Dict -from web3._utils import request +from skale import Skale, SkaleIma from core.node_config import NodeConfig -from core.schains.checks import SChainChecks +from core.schains.checks import ConfigChecks, SkaledChecks, SChainChecks from core.schains.firewall import get_default_rule_controller from core.schains.ima import ImaData from core.schains.monitor import ( @@ -37,13 +38,16 @@ RotationMonitor, ReloadMonitor ) +from core.schains.monitor.config_monitor import RegularConfigMonitor +from core.schains.monitor.skaled_monitor import RegularSkaledMonitor +from core.schains.monitor.action import ConfigActionManager, SkaledActionManager +from core.schains.task import run_tasks, Task from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.skaled_status import init_skaled_status, SkaledStatus +from core.schains.skaled_status import SkaledStatus from tools.docker_utils import DockerUtils from tools.configs import BACKUP_RUN from tools.configs.ima import DISABLE_IMA -from tools.helper import is_node_part_of_chain from web.models.schain import upsert_schain_record, SChainRecord @@ -115,8 +119,89 @@ def get_monitor_type( return RegularMonitor -def run_monitor_for_schain(skale, skale_ima, node_config: NodeConfig, schain, dutils=None, - once=False): +def monitor_config(skale: Skale, schain: Dict, node_config: NodeConfig) -> None: + name = schain['name'] + schain_record = upsert_schain_record(name) + rotation_data = skale.node_rotation.get_rotation(name) + config_checks = ConfigChecks( + schain_name=name, + node_id=node_config.id, + schain_record=schain_record, + rotation_id=rotation_data['rotation_id'] + ) + + config_am = ConfigActionManager( + skale=skale, + schain=schain, + node_config=node_config, + rotation_data=rotation_data, + checks=config_checks + ) + + mon = RegularConfigMonitor(config_am) + mon.run() + + +def monitor_containers( + skale: Skale, + skale_ima: SkaleIma, + schain: Dict, + dutils: DockerUtils +) -> None: + name = schain['name'] + schain_record = upsert_schain_record(name) + + dutils = dutils or DockerUtils() + + rotation_data = skale.node_rotation.get_rotation(name) + ima_linked = not DISABLE_IMA and skale_ima.linker.has_schain(name) + + sync_agent_ranges = get_sync_agent_ranges(skale) + + rc = get_default_rule_controller( + name=name, + sync_agent_ranges=sync_agent_ranges + ) + skaled_checks = SkaledChecks( + schain_name=schain['name'], + schain_record=schain_record, + rule_controller=rc, + ima_linked=ima_linked, + dutils=dutils + ) + + finish_ts = skale.node_rotation.get_schain_finish_ts( + node_id=rotation_data['leaving_node'], + schain_name=name + ) + + ima_data = ImaData( + linked=ima_linked, + chain_id=skale_ima.web3.eth.chainId + ) + + # finish ts can be fetched from config + skaled_am = SkaledActionManager( + schain=schain, + rule_controller=rc, + ima_data=ima_data, + checks=skaled_checks, + finish_ts=finish_ts, + dutils=dutils + ) + + mon = RegularSkaledMonitor(skaled_am) + mon.run() + + +def run_monitor_for_schain( + skale, + skale_ima, + node_config: NodeConfig, + schain, + dutils=None, + once=False +): p = get_log_prefix(schain["name"]) def post_monitor_sleep(): @@ -129,62 +214,84 @@ def post_monitor_sleep(): while True: try: - logger.info(f'{p} monitor created') - reload(request) # fix for web3py multiprocessing issue (see SKALE-4251) - name = schain["name"] - dutils = dutils or DockerUtils() - - is_rotation_active = skale.node_rotation.is_rotation_active(name) - - if not is_node_part_of_chain(skale, name, node_config.id) and not is_rotation_active: - logger.warning(f'{p} NOT ON NODE ({node_config.id}), finising process...') - return True - - ima_linked = not DISABLE_IMA and skale_ima.linker.has_schain(name) - rotation_data = skale.node_rotation.get_rotation(name) - - sync_agent_ranges = get_sync_agent_ranges(skale) - - rc = get_default_rule_controller( - name=name, - sync_agent_ranges=sync_agent_ranges - ) - schain_record = upsert_schain_record(name) - checks = SChainChecks( - name, - node_config.id, - schain_record=schain_record, - rule_controller=rc, - rotation_id=rotation_data['rotation_id'], - ima_linked=ima_linked, - dutils=dutils - ) - - ima_data = ImaData( - linked=ima_linked, - chain_id=skale_ima.web3.eth.chainId - ) - skaled_status = init_skaled_status(name) - - monitor_class = get_monitor_type( - schain_record, - checks, - is_rotation_active, - skaled_status - ) - monitor = monitor_class( - skale=skale, - ima_data=ima_data, - schain=schain, - node_config=node_config, - rotation_data=rotation_data, - checks=checks, - rule_controller=rc - ) - monitor.run() - if once: - return True + tasks = [ + Task( + f'{name}-config', + functools.partial( + monitor_config, + skale=skale, + schain=schain, + node_config=node_config + ) + ), + Task( + f'{name}-skaled', + functools.partial( + monitor_containers, + skale=skale, + skale_ima=skale_ima, + schain=schain, + dutils=dutils + ), + ) + ] + run_tasks(name=name, tasks=tasks) + # logger.info(f'{p} monitor created') + # reload(request) # fix for web3py multiprocessing issue (see SKALE-4251) + + # dutils = dutils or DockerUtils() + + # is_rotation_active = skale.node_rotation.is_rotation_active(name) + + # if not is_node_part_of_chain(skale, name, node_config.id) and not is_rotation_active: + # logger.warning(f'{p} NOT ON NODE ({node_config.id}), finising process...') + # return True + + # ima_linked = not DISABLE_IMA and skale_ima.linker.has_schain(name) + # rotation_data = skale.node_rotation.get_rotation(name) + + # sync_agent_ranges = get_sync_agent_ranges(skale) + + # rc = get_default_rule_controller( + # name=name, + # sync_agent_ranges=sync_agent_ranges + # ) + # schain_record = upsert_schain_record(name) + # checks = SChainChecks( + # name, + # node_config.id, + # schain_record=schain_record, + # rule_controller=rc, + # rotation_id=rotation_data['rotation_id'], + # ima_linked=ima_linked, + # dutils=dutils + # ) + + # ima_data = ImaData( + # linked=ima_linked, + # chain_id=skale_ima.web3.eth.chainId + # ) + # skaled_status = init_skaled_status(name) + + # monitor_class = get_monitor_type( + # schain_record, + # checks, + # is_rotation_active, + # skaled_status + # ) + # monitor = monitor_class( + # skale=skale, + # ima_data=ima_data, + # schain=schain, + # node_config=node_config, + # rotation_data=rotation_data, + # checks=checks, + # rule_controller=rc + # ) + # monitor.run() + # if once: + # return True post_monitor_sleep() except Exception: logger.exception(f'{p} monitor failed') diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 5b37f49be..d1387eacf 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -65,7 +65,7 @@ def run_process_manager(skale, skale_ima, node_config): schain_record = upsert_schain_record(schain['name']) log_prefix = f'sChain {schain["name"]} -' # todo - move to logger formatter - terminate_stuck_schain_process(skale, schain_record, schain) + # terminate_stuck_schain_process(skale, schain_record, schain) monitor_process_alive = is_monitor_process_alive(schain_record.monitor_id) if not monitor_process_alive: diff --git a/core/schains/task.py b/core/schains/task.py index 5261c55f1..abf3cf04b 100644 --- a/core/schains/task.py +++ b/core/schains/task.py @@ -31,8 +31,8 @@ def keep_tasks_running( futures[i] = executor.submit(task.run) -def run_tasks(tasks: List[Task]) -> None: - with ThreadPoolExecutor(max_workers=len(tasks)) as executor: +def run_tasks(name: str, tasks: List[Task]) -> None: + with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix=name) as executor: futures: List[Optional[Future]] = [None for i in range(len(tasks))] while True: keep_tasks_running(executor, tasks, futures) diff --git a/tests/schains/monitor/regular_monitor_test.py b/tests/schains/monitor/regular_monitor_test.py index 3395adab0..e34420f9b 100644 --- a/tests/schains/monitor/regular_monitor_test.py +++ b/tests/schains/monitor/regular_monitor_test.py @@ -9,7 +9,7 @@ from core.schains.runner import get_container_name from core.schains.checks import SChainChecks -from core.schains.monitor import RegularMonitor +from core.schains.monitor.config_monitor import RegularConfigMonitor from core.schains.ima import ImaData from tools.configs import SGX_CERTIFICATES_FOLDER, SGX_SERVER_URL From 1291c3336252350a891632ad2f57444ad2ca1557 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 8 Jun 2023 12:42:19 +0000 Subject: [PATCH 28/84] Fix config checks --- core/schains/checks.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 709a3adbb..6e30d0889 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -24,10 +24,11 @@ from typing import Any, Dict from core.schains.config.directory import ( + get_schain_check_filepath, get_schain_config, + new_schain_config_filepath, schain_config_dir, - schain_config_filepath, - get_schain_check_filepath + schain_config_filepath ) from core.schains.config.helper import ( get_base_port_from_config, @@ -35,7 +36,7 @@ get_own_ip_from_config, get_local_schain_http_endpoint ) -from core.schains.config.main import schain_config_version_match +from core.schains.config.main import get_latest_config_filepath, schain_config_version_match from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController from core.schains.process_manager_helper import is_monitor_process_alive @@ -119,7 +120,7 @@ def dkg(self) -> CheckRes: def config(self) -> CheckRes: # TODO: this should be check for the newest config """Checks that sChain config file exists""" - config_filepath = schain_config_filepath(self.name) + config_filepath = new_schain_config_filepath(self.name, self.rotation_id) if not os.path.isfile(config_filepath): return CheckRes(False) return CheckRes( @@ -186,6 +187,17 @@ def is_healthy(self) -> bool: checks = self.get_all() return False not in checks.values() + @property + def config_exists(self) -> CheckRes: + pass + + def config_latest(self) -> CheckRes: + upstream_path = schain_config_filepath(self.name) + latest_path = get_latest_config_filepath(self.name) + upstream_mtime = os.stat(upstream_path, follow_symlinks=False).st_mtime + latest_mtime = os.stat(latest_path, follow_symlinks=False).st_mtime + return CheckRes(upstream_mtime >= latest_mtime) + @property def config_file(self) -> CheckRes: """ Checks that at least one sChain config file exists """ From d33764017b1efab0cec40d6f810102e670094c50 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 8 Jun 2023 18:48:44 +0000 Subject: [PATCH 29/84] Add the rest of monitor types --- core/schains/checks.py | 5 +- core/schains/config/main.py | 4 +- core/schains/monitor/action.py | 14 ++++- core/schains/monitor/config_monitor.py | 19 +++++-- core/schains/monitor/main.py | 78 ++++++-------------------- 5 files changed, 45 insertions(+), 75 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 6e30d0889..eeef507eb 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -188,10 +188,7 @@ def is_healthy(self) -> bool: return False not in checks.values() @property - def config_exists(self) -> CheckRes: - pass - - def config_latest(self) -> CheckRes: + def latest_config(self) -> CheckRes: upstream_path = schain_config_filepath(self.name) latest_path = get_latest_config_filepath(self.name) upstream_mtime = os.stat(upstream_path, follow_symlinks=False).st_mtime diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 863f04469..006fc340a 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -113,8 +113,8 @@ def save_new_schain_config(schain_config, schain_name, rotation_id): def set_as_upstream_config(schain_name: str, config_path: str) -> None: - upstream_link_filepath = schain_config_filepath(schain_name) - shutil.copy(config_path, upstream_link_filepath) + upstream_filepath = schain_config_filepath(schain_name) + shutil.copy(config_path, upstream_filepath) def update_schain_config_version(schain_name, schain_record=None): diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 92bfa1abc..160b4ba02 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -35,7 +35,10 @@ from core.schains.firewall.types import IRuleController from core.schains.volume import init_data_volume -from core.schains.rotation import get_schain_public_key +from core.schains.rotation import ( + get_schain_public_key, + set_rotation_for_schain +) from core.schains.limits import get_schain_type @@ -281,6 +284,9 @@ def reloaded_skaled_container(self) -> bool: remove_schain_container(self.name, dutils=self.dutils) else: logger.warning(f'sChain {self.name}: container doesn\'t exists') + self.schain_record.set_restart_count(0) + self.schain_record.set_failed_rpc_count(0) + self.schain_record.set_needs_reload(False) initial_status = self.skaled_container() return initial_status @@ -320,10 +326,14 @@ def cleanup_schain_docker_entity(self) -> bool: remove_schain_volume(self.name, dutils=self.dutils) return True - def set_upstream_config(self) -> bool: + @BaseActionManager.monitor_block + def fetch_upstream_config(self) -> bool: latest_filepath = get_latest_config_filepath(self.name) set_as_upstream_config(self.name, latest_filepath) + def send_exit_request(self) -> None: + set_rotation_for_schain(self.name, self.finish_ts) + def log_executed_blocks(self) -> None: logger.info(arguments_list_string( self.executed_blocks, f'Finished monitor runner - {self.name}')) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index 88cd39b49..c3831fb88 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -20,16 +20,22 @@ import logging from abc import abstractmethod +from core.schains.checks import ConfigChecks from core.schains.monitor.base_monitor import IMonitor -from core.schains.monitor.action import SkaledActionManager +from core.schains.monitor.action import ConfigActionManager logger = logging.getLogger(__name__) class BaseConfigMonitor(IMonitor): - def __init__(self, action_manager: SkaledActionManager): + def __init__( + self, + action_manager: ConfigActionManager, + checks: ConfigChecks + ) -> None: self.am = action_manager + self.checks = checks @abstractmethod def run(self) -> None: @@ -38,6 +44,9 @@ def run(self) -> None: class RegularConfigMonitor(BaseConfigMonitor): def run(self) -> None: - self.am.firewall_rules() - self.am.volume() - self.am.skaled_container() + if not self.checks.config_dir: + self.am.config_dir() + if not self.checks.dkg: + self.am.dkg() + if not self.checks.config: + self.am.config() diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 6f6b9205a..dc4573818 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -39,11 +39,11 @@ ReloadMonitor ) from core.schains.monitor.config_monitor import RegularConfigMonitor -from core.schains.monitor.skaled_monitor import RegularSkaledMonitor +from core.schains.monitor.skaled_monitor import get_skaled_monitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager from core.schains.task import run_tasks, Task from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.skaled_status import SkaledStatus +from core.schains.skaled_status import init_skaled_status, SkaledStatus from tools.docker_utils import DockerUtils from tools.configs import BACKUP_RUN @@ -105,7 +105,7 @@ def get_monitor_type( checks: SChainChecks, is_rotation_active: bool, skaled_status: SkaledStatus - ) -> BaseMonitor: +) -> BaseMonitor: if _is_backup_mode(schain_record): return BackupMonitor if _is_repair_mode(schain_record, checks, skaled_status): @@ -138,7 +138,7 @@ def monitor_config(skale: Skale, schain: Dict, node_config: NodeConfig) -> None: checks=config_checks ) - mon = RegularConfigMonitor(config_am) + mon = RegularConfigMonitor(config_am, config_checks) mon.run() @@ -180,6 +180,8 @@ def monitor_containers( chain_id=skale_ima.web3.eth.chainId ) + skaled_status = init_skaled_status(name) + # finish ts can be fetched from config skaled_am = SkaledActionManager( schain=schain, @@ -189,8 +191,13 @@ def monitor_containers( finish_ts=finish_ts, dutils=dutils ) - - mon = RegularSkaledMonitor(skaled_am) + mon = get_skaled_monitor( + action_manager=skaled_am, + checks=skaled_checks, + schain_record=schain_record, + skaled_status=skaled_status, + backup_run=BACKUP_RUN + ) mon.run() @@ -214,7 +221,7 @@ def post_monitor_sleep(): while True: try: - name = schain["name"] + name = schain['name'] tasks = [ Task( f'{name}-config', @@ -237,61 +244,8 @@ def post_monitor_sleep(): ) ] run_tasks(name=name, tasks=tasks) - # logger.info(f'{p} monitor created') - # reload(request) # fix for web3py multiprocessing issue (see SKALE-4251) - - # dutils = dutils or DockerUtils() - - # is_rotation_active = skale.node_rotation.is_rotation_active(name) - - # if not is_node_part_of_chain(skale, name, node_config.id) and not is_rotation_active: - # logger.warning(f'{p} NOT ON NODE ({node_config.id}), finising process...') - # return True - - # ima_linked = not DISABLE_IMA and skale_ima.linker.has_schain(name) - # rotation_data = skale.node_rotation.get_rotation(name) - - # sync_agent_ranges = get_sync_agent_ranges(skale) - - # rc = get_default_rule_controller( - # name=name, - # sync_agent_ranges=sync_agent_ranges - # ) - # schain_record = upsert_schain_record(name) - # checks = SChainChecks( - # name, - # node_config.id, - # schain_record=schain_record, - # rule_controller=rc, - # rotation_id=rotation_data['rotation_id'], - # ima_linked=ima_linked, - # dutils=dutils - # ) - - # ima_data = ImaData( - # linked=ima_linked, - # chain_id=skale_ima.web3.eth.chainId - # ) - # skaled_status = init_skaled_status(name) - - # monitor_class = get_monitor_type( - # schain_record, - # checks, - # is_rotation_active, - # skaled_status - # ) - # monitor = monitor_class( - # skale=skale, - # ima_data=ima_data, - # schain=schain, - # node_config=node_config, - # rotation_data=rotation_data, - # checks=checks, - # rule_controller=rc - # ) - # monitor.run() - # if once: - # return True + if once: + return True post_monitor_sleep() except Exception: logger.exception(f'{p} monitor failed') From 5a90e8bc8481ad16ac8f35c17e0fd7825dc0b79f Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:26:32 +0000 Subject: [PATCH 30/84] Improve checks naming --- core/schains/checks.py | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index eeef507eb..d74bafc19 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -26,9 +26,9 @@ from core.schains.config.directory import ( get_schain_check_filepath, get_schain_config, - new_schain_config_filepath, schain_config_dir, - schain_config_filepath + schain_config_filepath, + new_schain_config_filepath ) from core.schains.config.helper import ( get_base_port_from_config, @@ -36,7 +36,7 @@ get_own_ip_from_config, get_local_schain_http_endpoint ) -from core.schains.config.main import get_latest_config_filepath, schain_config_version_match +from core.schains.config.main import get_upstream_config_filepath, schain_config_version_match from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController from core.schains.process_manager_helper import is_monitor_process_alive @@ -77,6 +77,9 @@ def __init__(self, status: bool, data: dict = None): self.status = status self.data = data if data else {} + def __bool__(self) -> bool: + return self.status + class IChecks(ABC): @abstractmethod @@ -117,11 +120,10 @@ def dkg(self) -> CheckRes: return CheckRes(os.path.isfile(secret_key_share_filepath)) @property - def config(self) -> CheckRes: - # TODO: this should be check for the newest config + def upstream_config(self) -> CheckRes: """Checks that sChain config file exists""" - config_filepath = new_schain_config_filepath(self.name, self.rotation_id) - if not os.path.isfile(config_filepath): + upstream_path = new_schain_config_filepath(self.name, self.rotation_id) + if not os.path.isfile(upstream_path): return CheckRes(False) return CheckRes( schain_config_version_match(self.name, self.schain_record) @@ -188,18 +190,22 @@ def is_healthy(self) -> bool: return False not in checks.values() @property - def latest_config(self) -> CheckRes: - upstream_path = schain_config_filepath(self.name) - latest_path = get_latest_config_filepath(self.name) + def config_updated(self) -> CheckRes: + if not self.config: + return CheckRes(False) + upstream_path = get_upstream_config_filepath(self.name) + config_path = schain_config_filepath(self.name) + if not upstream_path: + return CheckRes(True) upstream_mtime = os.stat(upstream_path, follow_symlinks=False).st_mtime - latest_mtime = os.stat(latest_path, follow_symlinks=False).st_mtime - return CheckRes(upstream_mtime >= latest_mtime) + config_mtime = os.stat(config_path, follow_symlinks=False).st_mtime + return CheckRes(config_mtime >= upstream_mtime) @property - def config_file(self) -> CheckRes: - """ Checks that at least one sChain config file exists """ - config_filepath = schain_config_filepath(self.name) - return CheckRes(os.path.isfile(config_filepath)) + def config(self) -> CheckRes: + """ Checks that upstream sChain config file exists """ + config_path = schain_config_filepath(self.name) + return os.path.isfile(config_path) @property def volume(self) -> CheckRes: @@ -209,7 +215,7 @@ def volume(self) -> CheckRes: @property def firewall_rules(self) -> CheckRes: """Checks that firewall rules are set correctly""" - if self.config_file.status: + if self.config: conf = get_schain_config(self.name) base_port = get_base_port_from_config(conf) node_ips = get_node_ips_from_config(conf) From 0eeb5763c13c9ffbdc10b3798fc1c39299cab5f9 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:26:57 +0000 Subject: [PATCH 31/84] Clean config path commands logic --- core/schains/config/directory.py | 12 +++++++----- core/schains/config/main.py | 33 ++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index dfc390b1d..612e5eda1 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -20,7 +20,6 @@ import os import json import logging -import time from pathlib import Path from tools.configs import SCHAIN_CONFIG_DIR_SKALED @@ -33,13 +32,16 @@ logger = logging.getLogger(__name__) -def _config_filename(name: str) -> str: +def config_filename(name: str) -> str: return f'schain_{name}.json' +def new_config_prefix(name: str) -> str: + return f'scain_{name}_' + + def new_config_filename(name: str, rotation_id: int) -> str: - ts = int(time.time()) - return f'schain_{name}_{rotation_id}_{ts}.json' + return f'schain_{name}_{rotation_id}.json' def schain_config_dir(name: str) -> str: @@ -62,7 +64,7 @@ def init_schain_config_dir(name: str) -> str: def schain_config_filepath(name: str, in_schain_container=False) -> str: schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) - return os.path.join(schain_dir_path, _config_filename(name)) + return os.path.join(schain_dir_path, config_filename(name)) def new_schain_config_filepath(name: str, rotation_id: int, in_schain_container=False) -> str: diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 006fc340a..412097c79 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -28,9 +28,10 @@ from core.node import get_skale_node_version from core.schains.config.directory import ( get_tmp_schain_config_filepath, - new_schain_config_filepath, + new_config_prefix, schain_config_dir, - schain_config_filepath + schain_config_filepath, + new_schain_config_filepath ) from core.schains.config.generator import generate_schain_config_with_skale from tools.str_formatters import arguments_list_string @@ -69,7 +70,7 @@ def init_schain_config( update_schain_config_version(schain_name, schain_record=schain_record) -def init_schain_config2( +def create_new_schain_config( skale: Skale, node_id: int, schain_name: str, @@ -112,9 +113,9 @@ def save_new_schain_config(schain_config, schain_name, rotation_id): shutil.move(tmp_config_filepath, config_filepath) -def set_as_upstream_config(schain_name: str, config_path: str) -> None: - upstream_filepath = schain_config_filepath(schain_name) - shutil.copy(config_path, upstream_filepath) +def sync_config_with_file(schain_name: str, src_path: str) -> None: + dst_path = schain_config_filepath(schain_name) + shutil.copy(src_path, dst_path) def update_schain_config_version(schain_name, schain_record=None): @@ -133,12 +134,20 @@ def schain_config_version_match(schain_name, schain_record=None): return schain_record.config_version == skale_node_version -def get_latest_config_filepath(schain_name) -> Optional[str]: +def get_upstream_config_filepath(schain_name) -> Optional[str]: + # IVD TODO filter secret_key files config_dir = schain_config_dir(schain_name) - dir_files = sorted( - filter(lambda f: not os.path.islink(f), os.listdir(config_dir)), - key=lambda fname: os.stat(fname, follow_symlinks=False).st_mtime - ) + prefix = new_config_prefix(schain_name) + dir_files = None + if os.path.isdir(config_dir): + dir_files = sorted( + filter(lambda f: config_dir.startswith(prefix), os.listdir(config_dir)), + key=lambda fname: os.stat( + os.path.join( + config_dir, + fname + ), follow_symlinks=False).st_mtime + ) if not dir_files: return None - return dir_files[-1] + return os.path.join(config_dir, dir_files[-1]) From ae2663e4d247fdb5097151e6e0df377e7e6fd4ae Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:27:21 +0000 Subject: [PATCH 32/84] Improve config related actions --- core/schains/monitor/action.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 160b4ba02..7466d18f8 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# This file is part of SKALE Admin +# This file is part of SKALE Admin # # Copyright (C) 2021-Present SKALE Labs # @@ -50,9 +50,9 @@ get_container_name ) from core.schains.config.main import ( - get_latest_config_filepath, - init_schain_config2, - set_as_upstream_config + create_new_schain_config, + get_upstream_config_filepath, + sync_config_with_file ) from core.schains.config import init_schain_config_dir from core.schains.config.directory import get_schain_config @@ -175,10 +175,10 @@ def dkg(self) -> bool: return initial_status @BaseActionManager.monitor_block - def config(self, overwrite=False) -> bool: - initial_status = self.checks.config.status + def upstream_config(self, overwrite=False) -> bool: + initial_status = self.checks.upstream_config.status if not initial_status or overwrite: - init_schain_config2( + create_new_schain_config( skale=self.skale, node_id=self.node_config.id, schain_name=self.name, @@ -226,7 +226,7 @@ def volume(self) -> bool: @BaseActionManager.monitor_block def firewall_rules(self, overwrite=False) -> bool: - initial_status = self.checks.firewall_rules.status + initial_status = self.checks.firewall_rules if not initial_status: logger.info('Configuring firewall rules') conf = get_schain_config(self.name) @@ -327,10 +327,13 @@ def cleanup_schain_docker_entity(self) -> bool: return True @BaseActionManager.monitor_block - def fetch_upstream_config(self) -> bool: - latest_filepath = get_latest_config_filepath(self.name) - set_as_upstream_config(self.name, latest_filepath) + def update_config(self) -> bool: + upstream_path = get_upstream_config_filepath(self.name) + if upstream_path: + sync_config_with_file(self.name, upstream_path) + return upstream_path is not None + @BaseActionManager.monitor_block def send_exit_request(self) -> None: set_rotation_for_schain(self.name, self.finish_ts) From fb76437bd6dc6c9b451a0fdd77d6258ce61db082 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:27:37 +0000 Subject: [PATCH 33/84] Updated to new config check names --- core/schains/monitor/config_monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index c3831fb88..601dbacf8 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -48,5 +48,5 @@ def run(self) -> None: self.am.config_dir() if not self.checks.dkg: self.am.dkg() - if not self.checks.config: - self.am.config() + if not self.checks.upstream_config: + self.am.upstream_config() From 19ef38f663f95ced00540d29542e3b3dc452eb2d Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:28:18 +0000 Subject: [PATCH 34/84] Handle exceptions properly for Task --- core/schains/task.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/schains/task.py b/core/schains/task.py index abf3cf04b..89f2ad63b 100644 --- a/core/schains/task.py +++ b/core/schains/task.py @@ -13,7 +13,10 @@ def __init__(self, name: str, action: Callable, index: int = 0) -> None: self.action = action def run(self) -> None: - self.action() + try: + self.action() + except Exception as e: + logger.exception('Task %s failed with %s', self.name, e) def keep_tasks_running( From 73801b8cc36ad55204965cbac585e0378d3e1526 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:29:01 +0000 Subject: [PATCH 35/84] Add skaled_monitor module --- core/schains/monitor/skaled_monitor.py | 177 +++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 core/schains/monitor/skaled_monitor.py diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py new file mode 100644 index 000000000..6b42097b1 --- /dev/null +++ b/core/schains/monitor/skaled_monitor.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +# +# This file is part of SKALE Admin +# +# Copyright (C) 2021 SKALE Labs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import logging +from abc import abstractmethod + +from core.schains.monitor.base_monitor import IMonitor +from core.schains.checks import SkaledChecks +from core.schains.monitor.action import SkaledActionManager +from core.schains.skaled_status import SkaledStatus +from web.models.schain import SChainRecord + + +logger = logging.getLogger(__name__) + + +class BaseSkaledMonitor(IMonitor): + def __init__( + self, + action_manager: SkaledActionManager, + checks: SkaledChecks + ) -> None: + self.am = action_manager + self.p = self.am.p + self.checks = checks + + @abstractmethod + def run(self) -> None: + pass + + +class RegularSkaledMonitor(BaseSkaledMonitor): + def run(self) -> None: + if self.checks.config or self.am.update_config(): + if not self.checks.firewall_rules: + self.am.firewall_rules() + if not self.checks.volume: + self.am.volume() + if self.checks.volume and not self.checks.skaled_container: + self.am.skaled_container() + + +class RepairSkaledMonitor(BaseSkaledMonitor): + def run(self) -> None: + if self.checks.config or self.am.update_config(): + if not self.checks.firewall: + self.am.firewall() + if not self.checks.volume: + self.am.volume() + if self.checks.volume and not self.checks.skaled_container: + self.am.skaled_container() + + +class BackupSkaledMonitor(BaseSkaledMonitor): + def run(self) -> None: + if self.checks.config or self.am.update_config(): + if not self.checks.volume: + self.am.volume() + if not self.checks.firewall: + self.am.firewall_rules() + if not self.skaled_container: + self.am.skaled_container(download_snapshot=True) + if not self.checks.rpc: + self.am.skaled_rpc() + if not self.ima_container: + self.am.ima_container() + + +class RecreateSkaledMonitor(BaseSkaledMonitor): + def run(self) -> None: + logger.info( + '%s. Reload requested. Going to restart sChain container', + self.p + ) + self.am.reloaded_skaled_container() + + +class AfterExitTimeSkaledMonitor(BaseSkaledMonitor): + def run(self) -> None: + if not self.checks.config_updated: + self.am.update_config() + if self.checks.upstream_config and not self.checks.firewall: + self.am.firewall_rules() + self.am.reloaded_skaled_container() + + +class NewConfigSkaledMonitor(BaseSkaledMonitor): + # IVD should only be run for node rotation cases / or get timestamp for ip change. + def run(self): + if self.checks.config and not self.checks.firewall: + self.am.firewall_rules() + if not self.checks.skaled_container: + self.am.skaled_container() + if not self.checks.rpc: + self.am.skaled_rpc() + if not self.checks.ima_container: + self.am.ima_container() + # IVD TODO Send exit only once + self.am.send_exit_request() + + +def is_backup_mode(schain_record: SChainRecord, backup_run: bool) -> bool: + return schain_record.first_run and not schain_record.new_schain and backup_run + + +def is_repair_mode( + schain_record: SChainRecord, + checks: SkaledChecks, + skaled_status: SkaledStatus +) -> bool: + return schain_record.repair_mode or is_skaled_repair_status(checks, skaled_status) + + +def is_new_config(checks: SkaledChecks) -> bool: + return checks.config and not checks.config_updated + + +def is_exit_time_reached(checks: SkaledChecks, skaled_status: SkaledStatus) -> bool: + skaled_status.log() + return not checks.skaled_container.status and skaled_status.exit_time_reached + + +def is_reload_mode(schain_record: SChainRecord) -> bool: + return schain_record.needs_reload + + +def is_skaled_repair_status(checks: SkaledChecks, skaled_status: SkaledStatus) -> bool: + skaled_status.log() + needs_repair = skaled_status.clear_data_dir and skaled_status.start_from_snapshot + return not checks.skaled_container.status and needs_repair + + +def is_skaled_reload_status(checks: SkaledChecks, skaled_status: SkaledStatus) -> bool: + skaled_status.log() + needs_reload = skaled_status.start_again and not skaled_status.start_from_snapshot + return not checks.skaled_container.status and needs_reload + + +def get_skaled_monitor( + action_manager: SkaledActionManager, + checks: SkaledChecks, + schain_record: SChainRecord, + skaled_status: SkaledStatus, + backup_run: bool = False +) -> BaseSkaledMonitor: + mon_type = RegularSkaledMonitor + if is_backup_mode(schain_record, backup_run): + mon_type = BackupSkaledMonitor + if is_repair_mode(schain_record, checks, skaled_status): + mon_type = RepairSkaledMonitor + if is_new_config(checks): + mon_type = NewConfigSkaledMonitor + if is_exit_time_reached(checks, skaled_status): + mon_type = AfterExitTimeSkaledMonitor + elif is_reload_mode(schain_record): + mon_type = RecreateSkaledMonitor + + return mon_type( + action_manager=action_manager, + checks=checks + ) From 9a9a9d527e6d2c800c3d8a6c3c73069d6eb6e974 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:29:26 +0000 Subject: [PATCH 36/84] Fix config actions tests --- .../monitor/action/config_action_test.py | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/tests/schains/monitor/action/config_action_test.py b/tests/schains/monitor/action/config_action_test.py index 2125976a4..e8825f8e2 100644 --- a/tests/schains/monitor/action/config_action_test.py +++ b/tests/schains/monitor/action/config_action_test.py @@ -1,6 +1,9 @@ +import shutil + import pytest from core.schains.checks import ConfigChecks +from core.schains.config.directory import schain_config_dir from core.schains.monitor.action import ConfigActionManager from web.models.schain import SChainRecord @@ -51,12 +54,28 @@ def config_am( ) -def test_config_actions(config_am, config_checks): +def test_upstream_config_actions(config_am, config_checks): config_am.config_dir() - assert config_checks.config_dir.status - assert not config_checks.config.status + assert config_checks.config_dir + assert not config_checks.upstream_config + + # Folder created for secret key. Temporary moving + schain_folder = schain_config_dir(config_am.name) + tmp_schain_folder = '.' + schain_folder + try: + shutil.move(schain_folder, tmp_schain_folder) + assert not config_checks.config_dir + assert not config_checks.upstream_config + finally: + shutil.move(tmp_schain_folder, schain_folder) # DKG action is tested separetely in dkg_test module - config_am.config() - assert config_checks.config.status + config_am.config_dir() + config_am.upstream_config() + assert config_checks.config_dir + assert config_checks.upstream_config + + # Try to recreate config with no changes + config_am.upstream_config() + assert config_checks.upstream_config From dc23cf6a581b76d73a10ee435e6fef949ee82b46 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 10 Jun 2023 16:29:53 +0000 Subject: [PATCH 37/84] Bump pytest version to 7.x.x --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 5ad45b3f6..3fe677b4a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -pytest==3.8.1 +pytest==7.1.3 flake8==5.0.4 freezegun==0.3.15 mock==4.0.2 From ad49e10c1f51d197caf6b6d1d88db95ff90e0827 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sun, 11 Jun 2023 22:48:19 +0000 Subject: [PATCH 38/84] Improve skaled action test --- .../monitor/action/skaled_action_test.py | 182 +++++++++++++++++- tests/schains/monitor/regular_monitor_test.py | 2 +- 2 files changed, 174 insertions(+), 10 deletions(-) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 1bf8935da..9c7ac0a5f 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -1,11 +1,43 @@ import pytest +import mock from core.schains.checks import SkaledChecks +from core.schains.cleaner import remove_ima_container from core.schains.monitor.action import SkaledActionManager - +from core.schains.runner import get_container_info +from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER from web.models.schain import SChainRecord +def run_ima_container_mock(schain: dict, mainnet_chain_id: int, dutils=None): + image_name, container_name, _, _ = get_container_info( + IMA_CONTAINER, schain['name']) + dutils.safe_rm(container_name) + dutils.run_container( + image_name=image_name, + name=container_name, + entrypoint='bash -c "while true; do foo; sleep 2; done"' + ) + + +def monitor_schain_container_mock( + schain, + schain_record, + skaled_status, + public_key=None, + start_ts=None, + dutils=None +): + image_name, container_name, _, _ = get_container_info( + SCHAIN_CONTAINER, schain['name']) + dutils.safe_rm(container_name) + dutils.run_container( + image_name=image_name, + name=container_name, + entrypoint='bash -c "while true; do foo; sleep 2; done"' + ) + + @pytest.fixture def rotation_data(schain_db, skale): return skale.node_rotation.get_rotation(schain_db) @@ -61,15 +93,147 @@ def skaled_am( ) -def test_skaled_actions(skaled_am, skaled_checks, cleanup_schain_containers): +# def test_skaled_actions(skaled_am, skaled_checks, cleanup_schain_containers): +# try: +# skaled_am.firewall_rules() +# assert skaled_checks.firewall_rules +# skaled_am.volume() +# assert skaled_checks.volume +# skaled_am.skaled_container() +# assert skaled_checks.skaled_container +# skaled_am.ima_container() +# assert skaled_checks.ima_container +# # Try to create already created volume +# skaled_am.volume() +# assert skaled_checks.volume +# # Try to create already created container +# skaled_am.skaled_container() +# assert skaled_checks.skaled_container +# finally: +# skaled_am.cleanup_schain_docker_entity() +# +# +# def test_skaled_restart_reload_actions(skaled_am, skaled_checks, cleanup_schain_containers): +# try: +# skaled_am.volume() +# assert skaled_checks.volume +# skaled_am.skaled_container() +# skaled_am.reloaded_skaled_container() +# assert skaled_checks.skaled_container +# finally: +# skaled_am.cleanup_schain_docker_entity() + + +def test_volume_action(skaled_am, skaled_checks): try: - skaled_am.firewall_rules() - assert skaled_checks.firewall_rules + assert not skaled_checks.volume() skaled_am.volume() - assert skaled_checks.volume - skaled_am.skaled_container() - assert skaled_checks.skaled_container - skaled_am.ima_container() - assert skaled_checks.ima_container + assert skaled_checks.volume() + skaled_am.volume() + assert skaled_checks.volume() finally: skaled_am.cleanup_schain_docker_entity() + + +def test_base_monitor_skaled_container(skaled_am): + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.base_monitor.monitor_schain_container', + monitor_schain_container_mock + ): + assert not skaled_am.skaled_container() + assert skaled_am.skaled_container() + skaled_am.cleanup_schain_docker_entity() + + +def test_base_monitor_skaled_container_sync(skaled_am): + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.base_monitor.monitor_schain_container', + new=mock.Mock() + ) as monitor_schain_mock: + skaled_am.skaled_container(download_snapshot=True) + + monitor_schain_mock.assert_called_with( + skaled_am.schain, + schain_record=skaled_am.schain_record, + skaled_status=skaled_am.skaled_status, + public_key='0:0:1:0', + start_ts=None, + dutils=skaled_am.dutils + ) + assert monitor_schain_mock.call_count == 1 + + +def test_base_monitor_skaled_container_sync_delay_start(skaled_am): + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.base_monitor.monitor_schain_container', + new=mock.Mock() + ) as monitor_schain_mock: + skaled_am.finish_ts = 1245 + skaled_am.skaled_container(download_snapshot=True, delay_start=True) + + monitor_schain_mock.assert_called_with( + skaled_am.schain, + schain_record=skaled_am.schain_record, + skaled_status=skaled_am.skaled_status, + public_key='0:0:1:0', + start_ts=1245, + dutils=skaled_am.dutils + ) + assert monitor_schain_mock.call_count == 1 + + +def test_base_monitor_restart_skaled_container(skaled_am): + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.base_monitor.monitor_schain_container', + monitor_schain_container_mock + ): + assert not skaled_am.restart_skaled_container() + assert skaled_am.restart_skaled_container() + skaled_am.cleanup_schain_docker_entity() + + +def test_base_monitor_ima_container(skaled_am, schain_config, predeployed_ima): + skaled_am.config_dir() + skaled_am.ima_data.linked = True + with mock.patch( + 'core.schains.monitor.containers.run_ima_container', + run_ima_container_mock + ): + assert not skaled_am.ima_container() + assert skaled_am.ima_container() + remove_ima_container(skaled_am.name, dutils=skaled_am.dutils) + + +def test_base_monitor_cleanup(skaled_am, skaled_checks): + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.base_monitor.monitor_schain_container', + monitor_schain_container_mock + ): + skaled_am.skaled_container() + + assert skaled_checks.volume.status + assert skaled_checks.skaled_container + skaled_am.cleanup_schain_docker_entity() + assert skaled_checks.volume.status + assert skaled_checks.skaled_container + + +def test_schain_finish_ts(skale, schain_on_contracts): + name = schain_on_contracts + max_node_id = skale.nodes.get_nodes_number() - 1 + assert skale.node_rotation.get_schain_finish_ts(max_node_id, name) is None + + +def test_display_skaled_logs(skale, skaled_am, _schain_name): + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.base_monitor.monitor_schain_container', + monitor_schain_container_mock + ): + skaled_am.skaled_container() + skaled_am.display_skaled_logs() diff --git a/tests/schains/monitor/regular_monitor_test.py b/tests/schains/monitor/regular_monitor_test.py index e34420f9b..3395adab0 100644 --- a/tests/schains/monitor/regular_monitor_test.py +++ b/tests/schains/monitor/regular_monitor_test.py @@ -9,7 +9,7 @@ from core.schains.runner import get_container_name from core.schains.checks import SChainChecks -from core.schains.monitor.config_monitor import RegularConfigMonitor +from core.schains.monitor import RegularMonitor from core.schains.ima import ImaData from tools.configs import SGX_CERTIFICATES_FOLDER, SGX_SERVER_URL From 7e9e7d62142276b13d5f48b43f730a35941bdfe0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 12 Jun 2023 23:06:15 +0000 Subject: [PATCH 39/84] Handle empty skaled_status file. Fix skaled_action tests --- core/schains/monitor/action.py | 11 +- core/schains/monitor/skaled_monitor.py | 17 +- core/schains/skaled_status.py | 8 + .../monitor/action/skaled_action_test.py | 189 +++++++++--------- 4 files changed, 125 insertions(+), 100 deletions(-) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 7466d18f8..b96759754 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -35,10 +35,7 @@ from core.schains.firewall.types import IRuleController from core.schains.volume import init_data_volume -from core.schains.rotation import ( - get_schain_public_key, - set_rotation_for_schain -) +from core.schains.rotation import set_rotation_for_schain from core.schains.limits import get_schain_type @@ -199,6 +196,7 @@ def __init__( ima_data: ImaData, rule_controller: IRuleController, finish_ts: int, + public_key: str, checks: IChecks, dutils: DockerUtils = None ): @@ -210,6 +208,7 @@ def __init__( self.rc = rule_controller self.skaled_status = init_skaled_status(self.schain['name']) self.schain_type = get_schain_type(schain['partOfNode']) + self.public_key = public_key self.dutils = dutils or DockerUtils() @@ -248,7 +247,7 @@ def skaled_container(self, download_snapshot: bool = False, delay_start: bool = public_key, start_ts = None, None if download_snapshot: - public_key = get_schain_public_key(self.skale, self.name) + public_key = self.public_key if delay_start: start_ts = self.finish_ts @@ -308,7 +307,7 @@ def skaled_rpc(self) -> bool: @BaseActionManager.monitor_block def ima_container(self) -> bool: - initial_status = self.checks.ima_container.status + initial_status = self.checks.ima_container if not initial_status: monitor_ima_container( self.schain, diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 6b42097b1..c927ab9d0 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -19,6 +19,7 @@ import logging from abc import abstractmethod +from typing import Optional from core.schains.monitor.base_monitor import IMonitor from core.schains.checks import SkaledChecks @@ -122,7 +123,7 @@ def is_backup_mode(schain_record: SChainRecord, backup_run: bool) -> bool: def is_repair_mode( schain_record: SChainRecord, checks: SkaledChecks, - skaled_status: SkaledStatus + skaled_status: Optional[SkaledStatus] ) -> bool: return schain_record.repair_mode or is_skaled_repair_status(checks, skaled_status) @@ -131,7 +132,9 @@ def is_new_config(checks: SkaledChecks) -> bool: return checks.config and not checks.config_updated -def is_exit_time_reached(checks: SkaledChecks, skaled_status: SkaledStatus) -> bool: +def is_exit_time_reached(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: + if not skaled_status: + return False skaled_status.log() return not checks.skaled_container.status and skaled_status.exit_time_reached @@ -140,13 +143,17 @@ def is_reload_mode(schain_record: SChainRecord) -> bool: return schain_record.needs_reload -def is_skaled_repair_status(checks: SkaledChecks, skaled_status: SkaledStatus) -> bool: +def is_skaled_repair_status(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: + if skaled_status is None: + return False skaled_status.log() needs_repair = skaled_status.clear_data_dir and skaled_status.start_from_snapshot return not checks.skaled_container.status and needs_repair -def is_skaled_reload_status(checks: SkaledChecks, skaled_status: SkaledStatus) -> bool: +def is_skaled_reload_status(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: + if skaled_status is None: + return False skaled_status.log() needs_reload = skaled_status.start_again and not skaled_status.start_from_snapshot return not checks.skaled_container.status and needs_reload @@ -156,7 +163,7 @@ def get_skaled_monitor( action_manager: SkaledActionManager, checks: SkaledChecks, schain_record: SChainRecord, - skaled_status: SkaledStatus, + skaled_status: Optional[SkaledStatus], backup_run: bool = False ) -> BaseSkaledMonitor: mon_type = RegularSkaledMonitor diff --git a/core/schains/skaled_status.py b/core/schains/skaled_status.py index a50e64695..02186a4a9 100644 --- a/core/schains/skaled_status.py +++ b/core/schains/skaled_status.py @@ -21,6 +21,7 @@ import json import logging from json.decoder import JSONDecodeError +from typing import Optional from core.schains.config.directory import skaled_status_filepath from tools.config_utils import config_getter, log_broken_status_file @@ -101,3 +102,10 @@ def log(self) -> None: def init_skaled_status(schain_name) -> SkaledStatus: status_filepath = skaled_status_filepath(schain_name) return SkaledStatus(status_filepath) + + +def get_skaled_status(schain_name) -> Optional[SkaledStatus]: + status_path = skaled_status_filepath(schain_name) + if os.path.isfile(status_path): + return SkaledStatus(status_path) + return None diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 9c7ac0a5f..6776a702c 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -4,6 +4,7 @@ from core.schains.checks import SkaledChecks from core.schains.cleaner import remove_ima_container from core.schains.monitor.action import SkaledActionManager +from core.schains.rotation import get_schain_public_key from core.schains.runner import get_container_info from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER from web.models.schain import SChainRecord @@ -83,10 +84,12 @@ def skaled_am( ) rotation_data = skale.node_rotation.get_rotation(name) schain = skale.schains.get_by_name(name) + public_key = get_schain_public_key(skale, name) return SkaledActionManager( schain=schain, rule_controller=rule_controller, ima_data=ima_data, + public_key=public_key, finish_ts=finish_ts, checks=skaled_checks, dutils=dutils @@ -126,101 +129,107 @@ def skaled_am( def test_volume_action(skaled_am, skaled_checks): try: - assert not skaled_checks.volume() + assert not skaled_checks.volume skaled_am.volume() - assert skaled_checks.volume() + assert skaled_checks.volume skaled_am.volume() - assert skaled_checks.volume() + assert skaled_checks.volume finally: skaled_am.cleanup_schain_docker_entity() -def test_base_monitor_skaled_container(skaled_am): - skaled_am.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - assert not skaled_am.skaled_container() - assert skaled_am.skaled_container() - skaled_am.cleanup_schain_docker_entity() +def test_skaled_container_action(skaled_am, skaled_checks): + try: + with mock.patch( + 'core.schains.monitor.action.monitor_schain_container', + monitor_schain_container_mock + ): + skaled_am.volume() + assert not skaled_checks.skaled_container + skaled_am.skaled_container() + assert skaled_checks.skaled_container + finally: + skaled_am.cleanup_schain_docker_entity() -def test_base_monitor_skaled_container_sync(skaled_am): - skaled_am.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - new=mock.Mock() - ) as monitor_schain_mock: - skaled_am.skaled_container(download_snapshot=True) - - monitor_schain_mock.assert_called_with( - skaled_am.schain, - schain_record=skaled_am.schain_record, - skaled_status=skaled_am.skaled_status, - public_key='0:0:1:0', - start_ts=None, - dutils=skaled_am.dutils - ) - assert monitor_schain_mock.call_count == 1 - - -def test_base_monitor_skaled_container_sync_delay_start(skaled_am): - skaled_am.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - new=mock.Mock() - ) as monitor_schain_mock: - skaled_am.finish_ts = 1245 - skaled_am.skaled_container(download_snapshot=True, delay_start=True) - - monitor_schain_mock.assert_called_with( - skaled_am.schain, - schain_record=skaled_am.schain_record, - skaled_status=skaled_am.skaled_status, - public_key='0:0:1:0', - start_ts=1245, - dutils=skaled_am.dutils - ) - assert monitor_schain_mock.call_count == 1 +def test_skaled_container_with_snapshot_action(skaled_am): + try: + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.action.monitor_schain_container', + new=mock.Mock() + ) as monitor_schain_mock: + skaled_am.skaled_container(download_snapshot=True) + + monitor_schain_mock.assert_called_with( + skaled_am.schain, + schain_record=skaled_am.schain_record, + skaled_status=skaled_am.skaled_status, + public_key='0:0:1:0', + start_ts=None, + dutils=skaled_am.dutils + ) + assert monitor_schain_mock.call_count == 1 + finally: + skaled_am.cleanup_schain_docker_entity() -def test_base_monitor_restart_skaled_container(skaled_am): - skaled_am.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - assert not skaled_am.restart_skaled_container() - assert skaled_am.restart_skaled_container() - skaled_am.cleanup_schain_docker_entity() +def test_base_monitor_skaled_container_snapshot_delay_start(skaled_am): + try: + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.action.monitor_schain_container', + new=mock.Mock() + ) as monitor_schain_mock: + skaled_am.finish_ts = 1245 + skaled_am.skaled_container(download_snapshot=True, delay_start=True) + + monitor_schain_mock.assert_called_with( + skaled_am.schain, + schain_record=skaled_am.schain_record, + skaled_status=skaled_am.skaled_status, + public_key='0:0:1:0', + start_ts=1245, + dutils=skaled_am.dutils + ) + assert monitor_schain_mock.call_count == 1 + finally: + skaled_am.cleanup_schain_docker_entity() + + +def test_restart_skaled_container_action(skaled_am, skaled_checks): + try: + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.action.monitor_schain_container', + monitor_schain_container_mock + ): + assert not skaled_checks.skaled_container + skaled_am.restart_skaled_container() + assert skaled_checks.skaled_container + skaled_am.restart_skaled_container() + assert skaled_checks.skaled_container + finally: + skaled_am.cleanup_schain_docker_entity() -def test_base_monitor_ima_container(skaled_am, schain_config, predeployed_ima): - skaled_am.config_dir() - skaled_am.ima_data.linked = True - with mock.patch( - 'core.schains.monitor.containers.run_ima_container', - run_ima_container_mock - ): - assert not skaled_am.ima_container() - assert skaled_am.ima_container() - remove_ima_container(skaled_am.name, dutils=skaled_am.dutils) - - -def test_base_monitor_cleanup(skaled_am, skaled_checks): - skaled_am.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - skaled_am.skaled_container() - - assert skaled_checks.volume.status - assert skaled_checks.skaled_container +def test_base_monitor_ima_container(skaled_am, skaled_checks, schain_config, predeployed_ima): + try: + skaled_am.ima_data.linked = True + with mock.patch( + 'core.schains.monitor.containers.run_ima_container', + run_ima_container_mock + ): + assert not skaled_checks.ima_container + skaled_am.ima_container() + assert skaled_checks.ima_container + finally: + remove_ima_container(skaled_am.name, dutils=skaled_am.dutils) + + +def test_base_monitor_cleanup_empty(skaled_am, skaled_checks): skaled_am.cleanup_schain_docker_entity() - assert skaled_checks.volume.status - assert skaled_checks.skaled_container + assert not skaled_checks.skaled_container def test_schain_finish_ts(skale, schain_on_contracts): @@ -230,10 +239,12 @@ def test_schain_finish_ts(skale, schain_on_contracts): def test_display_skaled_logs(skale, skaled_am, _schain_name): - skaled_am.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - skaled_am.skaled_container() - skaled_am.display_skaled_logs() + try: + skaled_am.volume() + with mock.patch( + 'core.schains.monitor.action.monitor_schain_container', + monitor_schain_container_mock + ): + skaled_am.skaled_container() + finally: + skaled_am.display_skaled_logs() From a1218d889cf2863788cf26047ec36eb4e30f6887 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:00:16 +0000 Subject: [PATCH 40/84] Fix upstream config file determination --- core/schains/config/directory.py | 2 +- core/schains/config/main.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index 612e5eda1..5a81e1063 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -37,7 +37,7 @@ def config_filename(name: str) -> str: def new_config_prefix(name: str) -> str: - return f'scain_{name}_' + return f'schain_{name}_' def new_config_filename(name: str, rotation_id: int) -> str: diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 412097c79..03979d995 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -135,18 +135,18 @@ def schain_config_version_match(schain_name, schain_record=None): def get_upstream_config_filepath(schain_name) -> Optional[str]: - # IVD TODO filter secret_key files config_dir = schain_config_dir(schain_name) prefix = new_config_prefix(schain_name) dir_files = None if os.path.isdir(config_dir): + configs = [ + os.path.join(config_dir, fname) + for fname in os.listdir(config_dir) + if fname.startswith(prefix) + ] dir_files = sorted( - filter(lambda f: config_dir.startswith(prefix), os.listdir(config_dir)), - key=lambda fname: os.stat( - os.path.join( - config_dir, - fname - ), follow_symlinks=False).st_mtime + configs, + key=lambda path: os.stat(path, follow_symlinks=False).st_mtime ) if not dir_files: return None From 2fb33a4c48f78e62e6bf0d5e77a93e4b2434f4c8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:17:08 +0000 Subject: [PATCH 41/84] Change logging format --- core/schains/process_manager.py | 16 ++++++++++------ core/schains/task.py | 2 +- tools/configs/logs.py | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index d1387eacf..b7790413a 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -70,12 +70,16 @@ def run_process_manager(skale, skale_ima, node_config): if not monitor_process_alive: logger.info(f'{log_prefix} PID {schain_record.monitor_id} is not running, spawning...') - process = Process(target=run_monitor_for_schain, args=( - skale, - skale_ima, - node_config, - schain - )) + process = Process( + name=schain['name'], + target=run_monitor_for_schain, + args=( + skale, + skale_ima, + node_config, + schain + ) + ) process.start() schain_record.set_monitor_id(process.ident) logger.info(f'{log_prefix} Process started: PID = {process.ident}') diff --git a/core/schains/task.py b/core/schains/task.py index 89f2ad63b..e6231ed07 100644 --- a/core/schains/task.py +++ b/core/schains/task.py @@ -35,7 +35,7 @@ def keep_tasks_running( def run_tasks(name: str, tasks: List[Task]) -> None: - with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix=name) as executor: + with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='T') as executor: futures: List[Optional[Future]] = [None for i in range(len(tasks))] while True: keep_tasks_running(executor, tasks, futures) diff --git a/tools/configs/logs.py b/tools/configs/logs.py index 35376a400..2a0c89496 100644 --- a/tools/configs/logs.py +++ b/tools/configs/logs.py @@ -43,5 +43,5 @@ LOG_BACKUP_COUNT = 3 -ADMIN_LOG_FORMAT = '[%(asctime)s %(levelname)s] - %(process)d - %(threadName)s - %(name)s:%(lineno)d - %(message)s' # noqa +ADMIN_LOG_FORMAT = '[%(asctime)s %(levelname)s][%(process)d][%(processName)s][%(threadName)s] - %(name)s:%(lineno)d - %(message)s' # noqa API_LOG_FORMAT = '[%(asctime)s] %(process)d %(levelname)s %(url)s %(module)s: %(message)s' # noqa From 7007692f18c5737a65ed9d7952859635c07d6d90 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:18:26 +0000 Subject: [PATCH 42/84] Add update config test --- .../monitor/action/skaled_action_test.py | 75 ++++++++++++++++++- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 6776a702c..8ed535101 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -1,14 +1,24 @@ +import datetime +import json +import os +import time + +import freezegun import pytest import mock from core.schains.checks import SkaledChecks from core.schains.cleaner import remove_ima_container +from core.schains.config.directory import new_config_filename, schain_config_dir from core.schains.monitor.action import SkaledActionManager from core.schains.rotation import get_schain_public_key from core.schains.runner import get_container_info from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER from web.models.schain import SChainRecord +CURRENT_TIMESTAMP = 1594903080 +CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) + def run_ima_container_mock(schain: dict, mainnet_chain_id: int, dutils=None): image_name, container_name, _, _ = get_container_info( @@ -174,7 +184,7 @@ def test_skaled_container_with_snapshot_action(skaled_am): skaled_am.cleanup_schain_docker_entity() -def test_base_monitor_skaled_container_snapshot_delay_start(skaled_am): +def test_skaled_container_snapshot_delay_start_action(skaled_am): try: skaled_am.volume() with mock.patch( @@ -198,6 +208,7 @@ def test_base_monitor_skaled_container_snapshot_delay_start(skaled_am): def test_restart_skaled_container_action(skaled_am, skaled_checks): + skaled_am.reloaded_skaled_container() try: skaled_am.volume() with mock.patch( @@ -209,11 +220,13 @@ def test_restart_skaled_container_action(skaled_am, skaled_checks): assert skaled_checks.skaled_container skaled_am.restart_skaled_container() assert skaled_checks.skaled_container + skaled_am.reloaded_skaled_container() + assert skaled_checks.skaled_container finally: skaled_am.cleanup_schain_docker_entity() -def test_base_monitor_ima_container(skaled_am, skaled_checks, schain_config, predeployed_ima): +def test_ima_container_action(skaled_am, skaled_checks, schain_config, predeployed_ima): try: skaled_am.ima_data.linked = True with mock.patch( @@ -223,11 +236,13 @@ def test_base_monitor_ima_container(skaled_am, skaled_checks, schain_config, pre assert not skaled_checks.ima_container skaled_am.ima_container() assert skaled_checks.ima_container + skaled_am.ima_container() + assert skaled_checks.ima_container finally: remove_ima_container(skaled_am.name, dutils=skaled_am.dutils) -def test_base_monitor_cleanup_empty(skaled_am, skaled_checks): +def test_cleanup_empty_action(skaled_am, skaled_checks): skaled_am.cleanup_schain_docker_entity() assert not skaled_checks.skaled_container @@ -239,6 +254,9 @@ def test_schain_finish_ts(skale, schain_on_contracts): def test_display_skaled_logs(skale, skaled_am, _schain_name): + skaled_am.log_executed_blocks() + # Don't display if no container + skaled_am.display_skaled_logs() try: skaled_am.volume() with mock.patch( @@ -248,3 +266,54 @@ def test_display_skaled_logs(skale, skaled_am, _schain_name): skaled_am.skaled_container() finally: skaled_am.display_skaled_logs() + skaled_am.cleanup_schain_docker_entity() + + +@freezegun.freeze_time(CURRENT_DATETIME) +def test_upd_schain_record(skaled_am, skaled_checks): + # Prepare fake record + r = SChainRecord.get_by_name(skaled_am.name) + r.set_restart_count(1) + r.set_failed_rpc_count(1) + + assert r.monitor_last_seen != CURRENT_DATETIME + skaled_am._upd_last_seen() + r = SChainRecord.get_by_name(skaled_am.name) + assert r.monitor_last_seen == CURRENT_DATETIME + skaled_am._upd_schain_record() + r = SChainRecord.get_by_name(skaled_am.name) + + assert not r.first_run + assert not r.new_schain + r.restart_count == 0 + r.failed_rpc_count == 0 + + +def test_update_config(skaled_am, skaled_checks): + folder = schain_config_dir(skaled_am.name) + config_path = os.path.join(folder, f'schain_{skaled_am.name}.json') + os.remove(config_path) + + assert not skaled_checks.config + assert not skaled_checks.config_updated + upstream_path = os.path.join(folder, new_config_filename(skaled_am.name, rotation_id=5)) + config_content = {'config': 'mock_v5'} + with open(upstream_path, 'w') as upstream_file: + json.dump(config_content, upstream_file) + skaled_am.update_config() + with open(config_path) as config_file: + json.load(config_file) == config_content + assert skaled_checks.config + assert skaled_checks.config_updated + + time.sleep(1) + upstream_path = os.path.join(folder, new_config_filename(skaled_am.name, rotation_id=6)) + config_content = {'config': 'mock_v6'} + with open(upstream_path, 'w') as upstream_file: + json.dump(config_content, upstream_file) + + assert skaled_checks.config + assert not skaled_checks.config_updated + skaled_am.update_config() + + assert skaled_checks.config_updated From 7f6fb797d32ef29ef03838f5aff99baac863506d Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:29:13 +0000 Subject: [PATCH 43/84] Raise custom exception for setExitTime request --- core/schains/rotation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/core/schains/rotation.py b/core/schains/rotation.py index dc7976c46..24b9ecfbb 100644 --- a/core/schains/rotation.py +++ b/core/schains/rotation.py @@ -27,13 +27,17 @@ logger = logging.getLogger(__name__) +class ExitRequestError(Exception): + pass + + def set_rotation_for_schain(schain_name: str, timestamp: int) -> None: url = get_skaled_http_address(schain_name) _send_rotation_request(url, timestamp) def _send_rotation_request(url, timestamp): - logger.info(f'Send rotation request: {timestamp}') + logger.info(f'Sending rotation request: {timestamp}') headers = {'content-type': 'application/json'} data = { 'finishTime': timestamp @@ -50,7 +54,7 @@ def _send_rotation_request(url, timestamp): headers=headers, ).json() if response.get('error'): - raise Exception(response['error']['message']) + raise ExitRequestError(response['error']['message']) def get_schain_public_key(skale, schain_name): From 123c8babfcb4c484c8b65e525cdaac7372149046 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:34:03 +0000 Subject: [PATCH 44/84] Improve actions logging --- core/schains/monitor/action.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index b96759754..3e60c25c8 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -79,7 +79,7 @@ class BaseActionManager: def __init__(self, name: str): self.name = name self.executed_blocks = {} - self.p = f'{type(self).__name__} - schain: {self.name} -' + self.p = f'[{self.name}:{type(self).__name__}]' @staticmethod def monitor_block(f): @@ -110,7 +110,7 @@ def _upd_schain_record(self) -> None: set_first_run(self.name, False) self.schain_record.set_new_schain(False) logger.info( - f'sChain {self.name}: ' + f'{self.p}: ' f'restart_count - {self.schain_record.restart_count}, ' f'failed_rpc_count - {self.schain_record.failed_rpc_count}' ) @@ -277,12 +277,12 @@ def restart_skaled_container(self) -> bool: @BaseActionManager.monitor_block def reloaded_skaled_container(self) -> bool: - logger.info('Starting skaled with reloaded configuration') + logger.info('%s Starting skaled with reloaded configuration', self.p) initial_status = True if is_container_exists(self.name, dutils=self.dutils): remove_schain_container(self.name, dutils=self.dutils) else: - logger.warning(f'sChain {self.name}: container doesn\'t exists') + logger.warning('%s: container doesn\'t exists', self.p) self.schain_record.set_restart_count(0) self.schain_record.set_failed_rpc_count(0) self.schain_record.set_needs_reload(False) @@ -302,24 +302,26 @@ def skaled_rpc(self) -> bool: ) else: self.schain_record.set_failed_rpc_count(0) - logger.info(f'{self.p} rpc - ok') + logger.info('%s rpc - ok', self.p) return initial_status @BaseActionManager.monitor_block def ima_container(self) -> bool: initial_status = self.checks.ima_container if not initial_status: + logger.info('%s trying to run IMA container', self.p) monitor_ima_container( self.schain, self.ima_data, dutils=self.dutils ) else: - logger.info(f'{self.p} ima_container - ok') + logger.info('%s ima_container - ok', self.p) return initial_status @BaseActionManager.monitor_block def cleanup_schain_docker_entity(self) -> bool: + logger.info('%s removing docker artifacts', self.p) remove_schain_container(self.name, dutils=self.dutils) time.sleep(SCHAIN_CLEANUP_TIMEOUT) remove_schain_volume(self.name, dutils=self.dutils) @@ -329,7 +331,9 @@ def cleanup_schain_docker_entity(self) -> bool: def update_config(self) -> bool: upstream_path = get_upstream_config_filepath(self.name) if upstream_path: + logger.info('%s syncing with upstream %s', self.p, upstream_path) sync_config_with_file(self.name, upstream_path) + logger.info('%s no upstream config yet', self.p) return upstream_path is not None @BaseActionManager.monitor_block From db18071be3b4ebca0f3e275d29cc96908306bd98 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:36:05 +0000 Subject: [PATCH 45/84] Fix config check --- core/schains/checks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index d74bafc19..c1a15e513 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -203,9 +203,9 @@ def config_updated(self) -> CheckRes: @property def config(self) -> CheckRes: - """ Checks that upstream sChain config file exists """ + """ Checks that sChain config file exists """ config_path = schain_config_filepath(self.name) - return os.path.isfile(config_path) + return CheckRes(os.path.isfile(config_path)) @property def volume(self) -> CheckRes: @@ -253,7 +253,7 @@ def ima_container(self) -> CheckRes: def rpc(self) -> CheckRes: """Checks that local skaled RPC is accessible""" res = False - if self.config_file.status: + if self.config: http_endpoint = get_local_schain_http_endpoint(self.name) timeout = get_endpoint_alive_check_timeout( self.schain_record.failed_rpc_count @@ -264,7 +264,7 @@ def rpc(self) -> CheckRes: @property def blocks(self) -> CheckRes: """Checks that local skaled is mining blocks""" - if self.config_file.status: + if self.config: http_endpoint = get_local_schain_http_endpoint(self.name) return CheckRes(check_endpoint_blocks(http_endpoint)) return CheckRes(False) From ed742504300a4a4c513dc5481a29a67fe3012cfb Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 13 Jun 2023 22:37:38 +0000 Subject: [PATCH 46/84] Add process name to cleaner --- core/schains/cleaner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 9a67e27d7..30746f3a3 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -58,7 +58,7 @@ def run_cleaner(skale, node_config): - process = Process(target=monitor, args=(skale, node_config)) + process = Process(name='cleaner', target=monitor, args=(skale, node_config)) process.start() logger.info('Cleaner process started') process.join(JOIN_TIMEOUT) From 74a6a9fddb631fc4f40e9e7a9ad302db9636eca6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 14 Jun 2023 11:59:38 +0000 Subject: [PATCH 47/84] Upgrade predeployed versions for web3 6.3.0 compitability --- requirements.txt | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 393b038ce..1eb7921a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,17 +10,16 @@ simple-crypt==4.1.7 pycryptodome==3.12.0 python-iptables==1.0.0 -skale.py==5.8b1 +skale.py==6.0dev0 -ima-predeployed==1.3.5b1 -etherbase-predeployed==1.1.0b1 -marionette-predeployed==2.0.0b0 -multisigwallet-predeployed==1.1.0b0 -predeployed-generator==1.1.0a8 +ima-predeployed==2.0.0b0 +etherbase-predeployed==1.1.0b3 +marionette-predeployed==2.0.0b2 +config-controller-predeployed==1.0.1.dev2 +filestorage-predeployed==1.1.0.dev8 +multisigwallet-predeployed==1.1.0a8 -context-predeployed==1.0.0b0 -filestorage-predeployed==1.1.0b2 -config-controller-predeployed==1.0.1b0 +context-predeployed==1.0.0.dev3 psutil==5.9.3 @@ -34,5 +33,3 @@ cryptography==39.0.1 python-dateutil==2.8.1 python-telegram-bot==12.8 sh==1.14.1 - -eth-utils==1.10.0 From f427866c8dd50c0b3dc96b0f950c20a77c9f0e24 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 14 Jun 2023 12:14:38 +0000 Subject: [PATCH 48/84] Move from camel case web3 calls --- core/schains/config/helper.py | 2 +- core/schains/dkg/broadcast_filter.py | 12 +++++------ core/schains/dkg/client.py | 4 ++-- core/schains/dkg/utils.py | 2 +- core/schains/monitor/main.py | 21 ++++++++++++------- core/schains/monitor/skaled_monitor.py | 1 - core/schains/notifications.py | 6 +++--- tests/conftest.py | 2 +- tests/dkg_test/filter_test.py | 16 +++++++-------- tests/routes/wallet_test.py | 18 ++++++++--------- tools/helper.py | 2 +- tools/wallet_utils.py | 28 +++++++++++++++++++++++--- 12 files changed, 71 insertions(+), 43 deletions(-) diff --git a/core/schains/config/helper.py b/core/schains/config/helper.py index 3c876edaa..5ce91c05f 100644 --- a/core/schains/config/helper.py +++ b/core/schains/config/helper.py @@ -44,7 +44,7 @@ def get_static_params(env_type=ENV_TYPE, path=STATIC_PARAMS_FILEPATH): def fix_address(address): - return Web3.toChecksumAddress(address) + return Web3.to_checksum_address(address) def get_chain_id(schain_name: str) -> str: diff --git a/core/schains/dkg/broadcast_filter.py b/core/schains/dkg/broadcast_filter.py index 0bb25e8d6..023a49e25 100644 --- a/core/schains/dkg/broadcast_filter.py +++ b/core/schains/dkg/broadcast_filter.py @@ -35,8 +35,8 @@ class DKGEvent: class Filter: def __init__(self, skale, schain_name, n): self.skale = skale - self.group_index = skale.web3.sha3(text=schain_name) - self.group_index_str = self.skale.web3.toHex(self.group_index) + self.group_index = skale.web3.keccak(text=schain_name) + self.group_index_str = self.skale.web3.to_hex(self.group_index) self.first_unseen_block = -1 self.dkg_contract = skale.dkg.contract self.dkg_contract_address = skale.dkg.address @@ -44,7 +44,7 @@ def __init__(self, skale, schain_name, n): self.n = n self.t = (2 * n + 1) // 3 # TODO: use scheme below to calculate event hash - # self.skale.web3.toHex(self.skale.web3.sha3( + # self.skale.web3.to_hex(self.skale.web3.keccak( # text="BroadcastAndKeyShare(bytes32,uint256,tuple[],tuple[])") # ) @@ -90,12 +90,12 @@ def get_events(self, from_channel_started_block=False): ).call() else: start_block = self.first_unseen_block - current_block = self.skale.web3.eth.getBlock("latest")["number"] + current_block = self.skale.web3.eth.get_block("latest")["number"] logger.info(f'sChain {self.group_index_str}: Parsing broadcast events from {start_block}' f' block to {current_block} block') events = [] for block_number in range(start_block, current_block + 1): - block = self.skale.web3.eth.getBlock(block_number, full_transactions=True) + block = self.skale.web3.eth.get_block(block_number, full_transactions=True) txns = block["transactions"] for tx in txns: try: @@ -104,7 +104,7 @@ def get_events(self, from_channel_started_block=False): hash = tx.get("hash") if hash: - receipt = self.skale.web3.eth.getTransactionReceipt(hash) + receipt = self.skale.web3.eth.get_transaction_receipt(hash) else: logger.info(f'sChain {self.group_index_str}: tx {tx}' f' does not have field "hash"') diff --git a/core/schains/dkg/client.py b/core/schains/dkg/client.py index 1ee5044a3..00ff77179 100644 --- a/core/schains/dkg/client.py +++ b/core/schains/dkg/client.py @@ -143,7 +143,7 @@ def __init__(self, node_id_dkg, node_id_contract, skale, t, n, schain_name, publ self.t = t self.n = n self.eth_key_name = eth_key_name - group_index_str = str(int(skale.web3.toHex(self.group_index)[2:], 16)) + group_index_str = str(int(skale.web3.to_hex(self.group_index)[2:], 16)) self.poly_name = generate_poly_name(group_index_str, self.node_id_dkg, rotation_id) self.bls_name = generate_bls_key_name(group_index_str, self.node_id_dkg, rotation_id) self.incoming_verification_vector = ['0' for _ in range(n)] @@ -153,7 +153,7 @@ def __init__(self, node_id_dkg, node_id_contract, skale, t, n, schain_name, publ self.node_ids_contract = node_ids_contract self.dkg_contract_functions = self.skale.dkg.contract.functions self.dkg_timeout = self.skale.constants_holder.get_dkg_timeout() - self.complaint_error_event_hash = self.skale.web3.toHex(self.skale.web3.sha3( + self.complaint_error_event_hash = self.skale.web3.to_hex(self.skale.web3.keccak( text="ComplaintError(string)" )) logger.info( diff --git a/core/schains/dkg/utils.py b/core/schains/dkg/utils.py index b5c88ab15..87ee20573 100644 --- a/core/schains/dkg/utils.py +++ b/core/schains/dkg/utils.py @@ -275,7 +275,7 @@ def wait_for_fail(skale, schain_name, channel_started_time, reason=""): def get_latest_block_timestamp(skale): - return skale.web3.eth.getBlock("latest")["timestamp"] + return skale.web3.eth.get_block("latest")["timestamp"] def get_secret_key_share_filepath(schain_name, rotation_id): diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index dc4573818..9cb42531d 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -22,8 +22,10 @@ import random import logging from typing import Dict +from importlib import reload from skale import Skale, SkaleIma +from web3._utils import request as web3_request from core.node_config import NodeConfig from core.schains.checks import ConfigChecks, SkaledChecks, SChainChecks @@ -43,7 +45,8 @@ from core.schains.monitor.action import ConfigActionManager, SkaledActionManager from core.schains.task import run_tasks, Task from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.skaled_status import init_skaled_status, SkaledStatus +from core.schains.rotation import get_schain_public_key +from core.schains.skaled_status import get_skaled_status, SkaledStatus from tools.docker_utils import DockerUtils from tools.configs import BACKUP_RUN @@ -119,7 +122,7 @@ def get_monitor_type( return RegularMonitor -def monitor_config(skale: Skale, schain: Dict, node_config: NodeConfig) -> None: +def run_config_pipeline(skale: Skale, schain: Dict, node_config: NodeConfig) -> None: name = schain['name'] schain_record = upsert_schain_record(name) rotation_data = skale.node_rotation.get_rotation(name) @@ -142,7 +145,7 @@ def monitor_config(skale: Skale, schain: Dict, node_config: NodeConfig) -> None: mon.run() -def monitor_containers( +def run_skaled_pipeline( skale: Skale, skale_ima: SkaleIma, schain: Dict, @@ -177,10 +180,12 @@ def monitor_containers( ima_data = ImaData( linked=ima_linked, - chain_id=skale_ima.web3.eth.chainId + chain_id=skale_ima.web3.eth.chain_id ) - skaled_status = init_skaled_status(name) + skaled_status = get_skaled_status(name) + + public_key = get_schain_public_key(skale, name) # finish ts can be fetched from config skaled_am = SkaledActionManager( @@ -188,6 +193,7 @@ def monitor_containers( rule_controller=rc, ima_data=ima_data, checks=skaled_checks, + public_key=public_key, finish_ts=finish_ts, dutils=dutils ) @@ -221,12 +227,13 @@ def post_monitor_sleep(): while True: try: + reload(web3_request) name = schain['name'] tasks = [ Task( f'{name}-config', functools.partial( - monitor_config, + run_config_pipeline, skale=skale, schain=schain, node_config=node_config @@ -235,7 +242,7 @@ def post_monitor_sleep(): Task( f'{name}-skaled', functools.partial( - monitor_containers, + run_skaled_pipeline, skale=skale, skale_ima=skale_ima, schain=schain, diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index c927ab9d0..322981a55 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -102,7 +102,6 @@ def run(self) -> None: class NewConfigSkaledMonitor(BaseSkaledMonitor): - # IVD should only be run for node rotation cases / or get timestamp for ip change. def run(self): if self.checks.config and not self.checks.firewall: self.am.firewall_rules() diff --git a/core/schains/notifications.py b/core/schains/notifications.py index 0371595ec..dda6fe9ba 100644 --- a/core/schains/notifications.py +++ b/core/schains/notifications.py @@ -31,8 +31,8 @@ def notify_if_not_enough_balance(skale: Skale, node_info: Dict) -> None: - eth_balance_wei = skale.web3.eth.getBalance(skale.wallet.address) + eth_balance_wei = skale.web3.eth.get_balance(skale.wallet.address) logger.info(f'Node account has {eth_balance_wei} WEI') - balance_in_skl = skale.web3.fromWei(eth_balance_wei, 'ether') - required_in_skl = skale.web3.fromWei(REQUIRED_BALANCE_WEI, 'ether') + balance_in_skl = skale.web3.from_wei(eth_balance_wei, 'ether') + required_in_skl = skale.web3.from_wei(REQUIRED_BALANCE_WEI, 'ether') notify_balance(node_info, balance_in_skl, required_in_skl) diff --git a/tests/conftest.py b/tests/conftest.py index cecd1b305..eb72ab27e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -572,7 +572,7 @@ def schain_struct(schain_config): @pytest.fixture def ima_data(skale): - return ImaData(linked=True, chain_id=skale.web3.eth.chainId) + return ImaData(linked=True, chain_id=skale.web3.eth.chain_id) @pytest.fixture diff --git a/tests/dkg_test/filter_test.py b/tests/dkg_test/filter_test.py index 28e794d6d..2580727bd 100644 --- a/tests/dkg_test/filter_test.py +++ b/tests/dkg_test/filter_test.py @@ -10,7 +10,7 @@ @pytest.fixture def filter_mock(skale): filter = Filter(skale, SCHAIN_NAME, N) - filter.first_unseen_block = skale.web3.eth.getBlock("latest")['number'] - 100 + filter.first_unseen_block = skale.web3.eth.get_block("latest")['number'] - 100 return filter @@ -24,9 +24,9 @@ def assert_not_called_with(self, *args, **kwargs): mock.Mock.assert_not_called_with = assert_not_called_with first = filter_mock.first_unseen_block - latest = skale.web3.eth.getBlock("latest")['number'] - with mock.patch.object(skale.web3.eth, 'getBlock', - wraps=skale.web3.eth.getBlock) as block_mock: + latest = skale.web3.eth.get_block("latest")['number'] + with mock.patch.object(skale.web3.eth, 'get_block', + wraps=skale.web3.eth.get_block) as block_mock: result = filter_mock.get_events() block_mock.assert_not_called_with(first - 1) block_mock.assert_any_call(first, full_transactions=True) @@ -36,10 +36,10 @@ def assert_not_called_with(self, *args, **kwargs): def test_get_events_from_start(skale, filter_mock): - latest = skale.web3.eth.getBlock("latest")['number'] - mock_start_block = skale.web3.eth.getBlock("latest")['number'] - 100 - with mock.patch.object(skale.web3.eth, 'getBlock', - wraps=skale.web3.eth.getBlock) as block_mock, \ + latest = skale.web3.eth.get_block("latest")['number'] + mock_start_block = skale.web3.eth.get_block("latest")['number'] - 100 + with mock.patch.object(skale.web3.eth, 'get_block', + wraps=skale.web3.eth.get_block) as block_mock, \ mock.patch.object(skale.dkg.contract.functions.getChannelStartedBlock, 'call', new=mock.Mock(return_value=mock_start_block)): result = filter_mock.get_events(from_channel_started_block=True) diff --git a/tests/routes/wallet_test.py b/tests/routes/wallet_test.py index 8b338266a..aedd40cd6 100644 --- a/tests/routes/wallet_test.py +++ b/tests/routes/wallet_test.py @@ -25,14 +25,14 @@ def handler(sender, **kwargs): def test_load_wallet(skale_bp, skale): data = get_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'info')) address = skale.wallet.address - eth_balance_wei = skale.web3.eth.getBalance(address) + eth_balance_wei = skale.web3.eth.get_balance(address) expected_data = { 'status': 'ok', 'payload': { 'address': to_checksum_address(address), 'eth_balance_wei': eth_balance_wei, 'skale_balance_wei': 0, # TODO: Remove from node cli - 'eth_balance': str(skale.web3.fromWei(eth_balance_wei, 'ether')), + 'eth_balance': str(skale.web3.from_wei(eth_balance_wei, 'ether')), 'skale_balance': '0' # TODO: Remove from node cli } } @@ -45,29 +45,29 @@ def test_send_eth(skale_bp, skale): amount_wei = skale.web3.toWei(amount, 'ether') receiver_0 = '0xf38b5dddd74b8901c9b5fb3ebd60bf5e7c1e9763' checksum_receiver_0 = to_checksum_address(receiver_0) - receiver_balance_0 = skale.web3.eth.getBalance(checksum_receiver_0) - balance_0 = skale.web3.eth.getBalance(address) + receiver_balance_0 = skale.web3.eth.get_balance(checksum_receiver_0) + balance_0 = skale.web3.eth.get_balance(address) json_data = { 'address': receiver_0, 'amount': amount } data = post_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'send-eth'), json_data) - balance_1 = skale.web3.eth.getBalance(address) + balance_1 = skale.web3.eth.get_balance(address) assert data == {'status': 'ok', 'payload': {}} assert balance_1 < balance_0 - assert skale.web3.eth.getBalance(checksum_receiver_0) - \ + assert skale.web3.eth.get_balance(checksum_receiver_0) - \ receiver_balance_0 == amount_wei receiver_1 = '0x01C19c5d3Ad1C3014145fC82263Fbae09e23924A' - receiver_balance_1 = skale.web3.eth.getBalance(receiver_1) + receiver_balance_1 = skale.web3.eth.get_balance(receiver_1) json_data = { 'address': receiver_1, 'amount': amount } data = post_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'send-eth'), json_data) assert data == {'status': 'ok', 'payload': {}} - assert skale.web3.eth.getBalance(address) < balance_1 - assert skale.web3.eth.getBalance(receiver_1) - \ + assert skale.web3.eth.get_balance(address) < balance_1 + assert skale.web3.eth.get_balance(receiver_1) - \ receiver_balance_1 == amount_wei diff --git a/tools/helper.py b/tools/helper.py index 8a67b54bf..c19538c59 100644 --- a/tools/helper.py +++ b/tools/helper.py @@ -160,7 +160,7 @@ def get_endpoint_call_speed(web3): scores = [] for _ in range(10): start = time.time() - result = web3.eth.gasPrice + result = web3.eth.gas_price if result: scores.append(time.time() - start) if len(scores) == 0: diff --git a/tools/wallet_utils.py b/tools/wallet_utils.py index d5a0eb2a2..255edc503 100644 --- a/tools/wallet_utils.py +++ b/tools/wallet_utils.py @@ -20,11 +20,12 @@ import logging +import requests from redis import Redis - from skale.utils.web3_utils import init_web3 from skale.wallets import BaseWallet, RedisWalletAdapter, SgxWallet from skale.wallets.web3_wallet import to_checksum_address +from web3.providers.rpc import HTTPProvider from tools.configs import ( DEFAULT_POOL, @@ -43,12 +44,12 @@ def wallet_with_balance(skale): # todo: move to the skale.py address = skale.wallet.address - eth_balance_wei = skale.web3.eth.getBalance(address) + eth_balance_wei = skale.web3.eth.get_balance(address) return { 'address': to_checksum_address(address), 'eth_balance_wei': eth_balance_wei, 'skale_balance_wei': 0, - 'eth_balance': str(skale.web3.fromWei(eth_balance_wei, 'ether')), + 'eth_balance': str(skale.web3.from_wei(eth_balance_wei, 'ether')), 'skale_balance': '0' } @@ -71,3 +72,24 @@ def init_wallet( path_to_cert=SGX_CERTIFICATES_FOLDER ) return RedisWalletAdapter(rs, pool, sgx_wallet) + + +class HTTPProviderNoCache(HTTPProvider): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs, session=None) + + def make_request(self, method, params): + logger.debug('Making request HTTPCustom. URI: %s, Method: %s', + self.endpoint_uri, method) + request_data = self.encode_rpc_request(method, params) + raw_response = requests.post( + self.endpoint_uri, + request_data, + **self.get_request_kwargs() + ) + raw_response.raise_for_status() + response = self.decode_rpc_response(raw_response.content) + logger.debug('Getting response HTTP Custom. URI: %s, ' + 'Method: %s, Response: %s', + self.endpoint_uri, method, response) + return response From 3531fa5afee0e403f555681fcfc67c5503ada036 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 14 Jun 2023 12:15:12 +0000 Subject: [PATCH 49/84] Add config updated check --- tests/schains/checks_test.py | 37 +++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 65574bc7d..16e228740 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -1,3 +1,4 @@ +import json import os from time import sleep from http import HTTPStatus @@ -9,10 +10,14 @@ import docker import pytest -from core.schains.skaled_exit_codes import SkaledExitCodes from core.schains.checks import SChainChecks, CheckRes +from core.schains.config.directory import ( + get_schain_check_filepath, + new_config_filename, + schain_config_dir +) +from core.schains.skaled_exit_codes import SkaledExitCodes from core.schains.runner import get_container_info -from core.schains.config.directory import get_schain_check_filepath from tools.configs.containers import SCHAIN_CONTAINER from tools.helper import read_json @@ -112,7 +117,7 @@ def test_dkg_check(schain_checks, sample_false_checks): def test_config_check(schain_checks, sample_false_checks): with mock.patch('core.schains.checks.schain_config_version_match', return_value=True): - assert schain_checks.config.status + assert schain_checks.config assert not sample_false_checks.config.status @@ -200,9 +205,9 @@ def test_blocks_check(schain_checks): with mock.patch('core.schains.checks.schain_config_version_match', return_value=True): with mock.patch('requests.post', return_value=res_mock), \ mock.patch('time.time', return_value=TEST_TIMESTAMP): - assert schain_checks.blocks.status + assert schain_checks.blocks with mock.patch('requests.post', return_value=res_mock): - assert not schain_checks.blocks.status + assert not schain_checks.blocks def test_init_checks(skale, schain_db, uninited_rule_controller, dutils): @@ -326,3 +331,25 @@ def test_get_all_with_save(node_config, rule_controller, dutils, schain_db): assert os.path.isfile(schain_check_path) checks_from_file = read_json(schain_check_path) assert schain_checks == checks_from_file['checks'] + + +def test_config_updated(skale, rule_controller, schain_db, dutils): + name = schain_db + folder = schain_config_dir(name) + + schain_record = SChainRecord.get_by_name(name) + + checks = SChainChecks( + name, + TEST_NODE_ID, + schain_record=schain_record, + rule_controller=rule_controller, + dutils=dutils + ) + assert checks.config_updated + + upstream_path = os.path.join(folder, new_config_filename(name, rotation_id=5)) + config_content = {'config': 'mock_v5'} + with open(upstream_path, 'w') as upstream_file: + json.dump(config_content, upstream_file) + assert not checks.config_updated From 06914be271c3e4ace88f942fc208eac69f772b32 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 14 Jun 2023 23:00:10 +0000 Subject: [PATCH 50/84] Restructure config monitor execution --- core/schains/monitor/config_monitor.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index 601dbacf8..eb3b2b3ed 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -38,12 +38,22 @@ def __init__( self.checks = checks @abstractmethod - def run(self) -> None: + def execute(self) -> None: pass + def run(self): + typename = type(self).__name__ + logger.info('Monitor type %s:', typename) + self.am._upd_last_seen() + self.am._upd_schain_record() + self.execute() + self.am.log_executed_blocks() + self.am._upd_last_seen() + logger.info('Finished %s monitor runner', typename) + class RegularConfigMonitor(BaseConfigMonitor): - def run(self) -> None: + def execute(self) -> None: if not self.checks.config_dir: self.am.config_dir() if not self.checks.dkg: From c32adad707298b199334bde867437fac0bc95ac6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 14 Jun 2023 23:01:01 +0000 Subject: [PATCH 51/84] Add NoConfigMonitor. Restructure skaled monitor execution --- core/schains/monitor/skaled_monitor.py | 99 ++++++++++++++------------ 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 322981a55..3368677ab 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -38,72 +38,75 @@ def __init__( checks: SkaledChecks ) -> None: self.am = action_manager - self.p = self.am.p self.checks = checks @abstractmethod - def run(self) -> None: + def execute(self) -> None: pass + def run(self): + typename = type(self).__name__ + logger.info('Monitor type %s:', typename) + self.am._upd_last_seen() + self.am._upd_schain_record() + self.execute() + self.am.log_executed_blocks() + self.am._upd_last_seen() + logger.info('Finished %s monitor runner', typename) + class RegularSkaledMonitor(BaseSkaledMonitor): - def run(self) -> None: - if self.checks.config or self.am.update_config(): - if not self.checks.firewall_rules: - self.am.firewall_rules() - if not self.checks.volume: - self.am.volume() - if self.checks.volume and not self.checks.skaled_container: - self.am.skaled_container() + def execute(self) -> None: + if not self.checks.firewall_rules: + self.am.firewall_rules() + if not self.checks.volume: + self.am.volume() + if self.checks.volume and not self.checks.skaled_container: + self.am.skaled_container() class RepairSkaledMonitor(BaseSkaledMonitor): - def run(self) -> None: - if self.checks.config or self.am.update_config(): - if not self.checks.firewall: - self.am.firewall() - if not self.checks.volume: - self.am.volume() - if self.checks.volume and not self.checks.skaled_container: - self.am.skaled_container() + def execute(self) -> None: + if not self.checks.firewall_rules: + self.am.firewall_rules() + if not self.checks.volume: + self.am.volume() + if self.checks.volume and not self.checks.skaled_container: + self.am.skaled_container(download_snapshot=True) class BackupSkaledMonitor(BaseSkaledMonitor): - def run(self) -> None: - if self.checks.config or self.am.update_config(): - if not self.checks.volume: - self.am.volume() - if not self.checks.firewall: - self.am.firewall_rules() - if not self.skaled_container: - self.am.skaled_container(download_snapshot=True) - if not self.checks.rpc: - self.am.skaled_rpc() - if not self.ima_container: - self.am.ima_container() + def execute(self) -> None: + if not self.checks.volume: + self.am.volume() + if not self.checks.firewall_rules: + self.am.firewall_rules() + if not self.am.skaled_container: + self.am.skaled_container(download_snapshot=True) + if not self.checks.rpc: + self.am.skaled_rpc() + if not self.ima_container: + self.am.ima_container() class RecreateSkaledMonitor(BaseSkaledMonitor): - def run(self) -> None: - logger.info( - '%s. Reload requested. Going to restart sChain container', - self.p - ) + def execute(self) -> None: + logger.info('Reload requested. Recreating sChain container') self.am.reloaded_skaled_container() class AfterExitTimeSkaledMonitor(BaseSkaledMonitor): - def run(self) -> None: + def execute(self) -> None: if not self.checks.config_updated: self.am.update_config() - if self.checks.upstream_config and not self.checks.firewall: + if self.checks.config and not self.checks.firewall_rules: self.am.firewall_rules() self.am.reloaded_skaled_container() class NewConfigSkaledMonitor(BaseSkaledMonitor): - def run(self): - if self.checks.config and not self.checks.firewall: + def execute(self): + if not self.checks.firewall_rules: self.am.firewall_rules() if not self.checks.skaled_container: self.am.skaled_container() @@ -111,10 +114,16 @@ def run(self): self.am.skaled_rpc() if not self.checks.ima_container: self.am.ima_container() - # IVD TODO Send exit only once + # TODO Prevent exit requests from spamming self.am.send_exit_request() +class NoConfigMonitor(BaseSkaledMonitor): + def execute(self): + if not self.am.update_config(): + logger.info('Waiting for upstream config') + + def is_backup_mode(schain_record: SChainRecord, backup_run: bool) -> bool: return schain_record.first_run and not schain_record.new_schain and backup_run @@ -166,14 +175,16 @@ def get_skaled_monitor( backup_run: bool = False ) -> BaseSkaledMonitor: mon_type = RegularSkaledMonitor + if not checks.config: + mon_type = NoConfigMonitor if is_backup_mode(schain_record, backup_run): mon_type = BackupSkaledMonitor - if is_repair_mode(schain_record, checks, skaled_status): + elif is_repair_mode(schain_record, checks, skaled_status): mon_type = RepairSkaledMonitor - if is_new_config(checks): - mon_type = NewConfigSkaledMonitor - if is_exit_time_reached(checks, skaled_status): + elif is_exit_time_reached(checks, skaled_status): mon_type = AfterExitTimeSkaledMonitor + elif is_new_config(checks): + mon_type = NewConfigSkaledMonitor elif is_reload_mode(schain_record): mon_type = RecreateSkaledMonitor From f4fa99b75e4413942b5f3b1c1a18118138a5ae34 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 15 Jun 2023 09:04:40 +0000 Subject: [PATCH 52/84] Get finish_ts from config. Add missing actions --- core/schains/config/main.py | 25 ++++++++++++- core/schains/monitor/action.py | 66 ++++++++++++++++++++++++---------- core/schains/monitor/main.py | 6 ---- 3 files changed, 72 insertions(+), 25 deletions(-) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 03979d995..c73f38481 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -21,7 +21,7 @@ import os import shutil import logging -from typing import Optional +from typing import Dict, Optional from skale import Skale @@ -151,3 +151,26 @@ def get_upstream_config_filepath(schain_name) -> Optional[str]: if not dir_files: return None return os.path.join(config_dir, dir_files[-1]) + + +def get_node_groups_from_config(config_path: str) -> Dict: + with open(config_path) as upstream_file: + upstream_config = json.load(upstream_file) + return upstream_config['skaleConfig']['sChain']['nodeGroups'] + + +def get_finish_ts(config_path: str) -> Optional[int]: + if not os.path.isfile(config_path): + return None + node_groups = get_node_groups_from_config(config_path) + return sorted(node_groups.keys())[-1]['finish_ts'] + + +def get_finish_ts_from_upstream_config(schain_name: str) -> Optional[int]: + upstream_path = get_upstream_config_filepath(schain_name) + return get_finish_ts(upstream_path) + + +def get_finish_ts_from_config(schain_name: str) -> Optional[int]: + upstream_path = schain_config_filepath(schain_name) + return get_finish_ts(upstream_path) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 3e60c25c8..35669c1ff 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -21,6 +21,7 @@ import logging from datetime import datetime from functools import wraps +from typing import Optional from skale import Skale @@ -48,6 +49,8 @@ ) from core.schains.config.main import ( create_new_schain_config, + get_finish_ts_from_config, + get_finish_ts_from_upstream_config, get_upstream_config_filepath, sync_config_with_file ) @@ -65,7 +68,13 @@ from tools.str_formatters import arguments_list_string from tools.configs.containers import SCHAIN_CONTAINER -from web.models.schain import upsert_schain_record, set_first_run, SChainRecord +from tools.notifications.messages import notify_repair_mode +from web.models.schain import ( + SChainRecord, + set_first_run, + switch_off_repair_mode, + upsert_schain_record +) logger = logging.getLogger(__name__) @@ -115,6 +124,10 @@ def _upd_schain_record(self) -> None: f'failed_rpc_count - {self.schain_record.failed_rpc_count}' ) + def log_executed_blocks(self) -> None: + logger.info(arguments_list_string( + self.executed_blocks, f'Finished monitor runner - {self.name}')) + class ConfigActionManager(BaseActionManager): def __init__( @@ -133,10 +146,6 @@ def __init__( self.rotation_data = rotation_data self.rotation_id = rotation_data['rotation_id'] - self.finish_ts = skale.node_rotation.get_schain_finish_ts( - node_id=rotation_data['leaving_node'], - schain_name=self.schain['name'] - ) super().__init__(name=schain['name']) @BaseActionManager.monitor_block @@ -172,9 +181,9 @@ def dkg(self) -> bool: return initial_status @BaseActionManager.monitor_block - def upstream_config(self, overwrite=False) -> bool: - initial_status = self.checks.upstream_config.status - if not initial_status or overwrite: + def upstream_config(self) -> bool: + initial_status = self.checks.upstream_config + if not initial_status: create_new_schain_config( skale=self.skale, node_id=self.node_config.id, @@ -195,15 +204,16 @@ def __init__( schain: dict, ima_data: ImaData, rule_controller: IRuleController, - finish_ts: int, public_key: str, checks: IChecks, + node_config: NodeConfig, dutils: DockerUtils = None ): self.ima_data = ima_data self.schain = schain self.generation = schain['generation'] self.checks = checks + self.node_config = node_config self.rc = rule_controller self.skaled_status = init_skaled_status(self.schain['name']) @@ -241,15 +251,18 @@ def firewall_rules(self, overwrite=False) -> bool: return initial_status @BaseActionManager.monitor_block - def skaled_container(self, download_snapshot: bool = False, delay_start: bool = False) -> bool: + def skaled_container( + self, + download_snapshot: bool = False, + start_ts: Optional[int] = None + ) -> bool: initial_status = self.checks.skaled_container.status if not initial_status: - public_key, start_ts = None, None - + public_key = None if download_snapshot: public_key = self.public_key - if delay_start: - start_ts = self.finish_ts + if start_ts is None: + start_ts = self.finish_ts monitor_schain_container( self.schain, @@ -338,11 +351,17 @@ def update_config(self) -> bool: @BaseActionManager.monitor_block def send_exit_request(self) -> None: - set_rotation_for_schain(self.name, self.finish_ts) + finish_ts = self.upstream_finish_ts + if finish_ts is not None: + set_rotation_for_schain(self.name, finish_ts) - def log_executed_blocks(self) -> None: - logger.info(arguments_list_string( - self.executed_blocks, f'Finished monitor runner - {self.name}')) + @property + def upstream_finish_ts(self) -> Optional[int]: + return get_finish_ts_from_upstream_config(self.name) + + @property + def finish_ts(self) -> Optional[int]: + return get_finish_ts_from_config(self.name) def display_skaled_logs(self) -> None: if is_container_exists(self.name, dutils=self.dutils): @@ -350,3 +369,14 @@ def display_skaled_logs(self) -> None: self.dutils.display_container_logs(container_name) else: logger.warning(f'sChain {self.name}: container doesn\'t exists, could not show logs') + + @BaseActionManager.monitor_block + def notify_repair_mode(self) -> None: + notify_repair_mode( + self.node_config.all(), + self.name + ) + + @BaseActionManager.monitor_block + def disable_repair_mode(self) -> None: + switch_off_repair_mode(self.name) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 9cb42531d..99cf096cf 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -173,11 +173,6 @@ def run_skaled_pipeline( dutils=dutils ) - finish_ts = skale.node_rotation.get_schain_finish_ts( - node_id=rotation_data['leaving_node'], - schain_name=name - ) - ima_data = ImaData( linked=ima_linked, chain_id=skale_ima.web3.eth.chain_id @@ -194,7 +189,6 @@ def run_skaled_pipeline( ima_data=ima_data, checks=skaled_checks, public_key=public_key, - finish_ts=finish_ts, dutils=dutils ) mon = get_skaled_monitor( From 5712319636a1b30c3c35fa008371c7dc6cafdc62 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 15 Jun 2023 09:08:07 +0000 Subject: [PATCH 53/84] Download snapshot if volume was just created --- core/schains/monitor/skaled_monitor.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 3368677ab..13ed5525f 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -61,12 +61,22 @@ def execute(self) -> None: self.am.firewall_rules() if not self.checks.volume: self.am.volume() - if self.checks.volume and not self.checks.skaled_container: + self.am.skaled_container(download_snapshot=True) + elif not self.checks.skaled_container: self.am.skaled_container() + if not self.checks.ima_container: + self.am.ima_container() class RepairSkaledMonitor(BaseSkaledMonitor): def execute(self) -> None: + logger.warning( + 'Repair mode execution, record: %s, exit_code_ok: %s', + self.checks.schain_record.repair_mode, + self.checks.exit_code_ok.status + ) + self.notify_repair_mode() + self.cleanup_schain_docker_entity() if not self.checks.firewall_rules: self.am.firewall_rules() if not self.checks.volume: From 138d47796f2be91d6dba2681b4de573392eda9c9 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 09:20:54 +0000 Subject: [PATCH 54/84] Save upstream config in new format --- core/schains/checks.py | 42 ++++++++++++++++++----------- core/schains/config/directory.py | 46 ++++++++++++++++++++++++++++---- core/schains/config/main.py | 15 ++++++----- core/schains/monitor/action.py | 33 ++++++++++++----------- core/schains/monitor/main.py | 19 ++++++++++--- 5 files changed, 110 insertions(+), 45 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index c1a15e513..a0afcea63 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -17,6 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import filecmp import os import time import logging @@ -24,11 +25,11 @@ from typing import Any, Dict from core.schains.config.directory import ( + config_exists_for_rotation_id_and_stream_version, get_schain_check_filepath, get_schain_config, schain_config_dir, - schain_config_filepath, - new_schain_config_filepath + schain_config_filepath ) from core.schains.config.helper import ( get_base_port_from_config, @@ -36,12 +37,14 @@ get_own_ip_from_config, get_local_schain_http_endpoint ) -from core.schains.config.main import get_upstream_config_filepath, schain_config_version_match +from core.schains.config.main import get_upstream_config_filepath from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController from core.schains.process_manager_helper import is_monitor_process_alive from core.schains.rpc import ( - check_endpoint_alive, check_endpoint_blocks, get_endpoint_alive_check_timeout + check_endpoint_alive, + check_endpoint_blocks, + get_endpoint_alive_check_timeout ) from core.schains.runner import get_container_name from core.schains.skaled_exit_codes import SkaledExitCodes @@ -97,12 +100,14 @@ def __init__( schain_name: str, node_id: int, schain_record: SChainRecord, - rotation_id: int + rotation_id: int, + stream_version: str ): self.name = schain_name self.node_id = node_id self.schain_record = schain_record self.rotation_id = rotation_id + self.stream_version = stream_version @property def config_dir(self) -> CheckRes: @@ -121,14 +126,16 @@ def dkg(self) -> CheckRes: @property def upstream_config(self) -> CheckRes: - """Checks that sChain config file exists""" - upstream_path = new_schain_config_filepath(self.name, self.rotation_id) - if not os.path.isfile(upstream_path): - return CheckRes(False) - return CheckRes( - schain_config_version_match(self.name, self.schain_record) + """Checks that config exists for rotation id and stream""" + return config_exists_for_rotation_id_and_stream_version( + self.name, + self.rotation_id, + self.stream_version ) + def new_schain(self) -> CheckRes: + return CheckRes(self.schain_record.new_schain) + def get_all(self, log=True, save=False, checks_filter=None) -> Dict: if not checks_filter: checks_filter = API_ALLOWED_CHECKS @@ -166,6 +173,7 @@ def __init__( self.container_name = get_container_name(SCHAIN_CONTAINER, self.name) self.ima_linked = ima_linked self.rc = rule_controller + self._new_schain = self.schain_record.new_schain def get_all(self, log=True, save=False, checks_filter=None) -> Dict: if not checks_filter: @@ -189,6 +197,10 @@ def is_healthy(self) -> bool: checks = self.get_all() return False not in checks.values() + @property + def new_schain(self) -> CheckRes: + return CheckRes(self._new_schain) + @property def config_updated(self) -> CheckRes: if not self.config: @@ -197,9 +209,7 @@ def config_updated(self) -> CheckRes: config_path = schain_config_filepath(self.name) if not upstream_path: return CheckRes(True) - upstream_mtime = os.stat(upstream_path, follow_symlinks=False).st_mtime - config_mtime = os.stat(config_path, follow_symlinks=False).st_mtime - return CheckRes(config_mtime >= upstream_mtime) + return CheckRes(filecmp.cmp(upstream_path, config_path)) @property def config(self) -> CheckRes: @@ -282,6 +292,7 @@ def __init__( node_id: int, schain_record: SChainRecord, rule_controller: IRuleController, + stream_version: str, rotation_id: int = 0, *, ima_linked: bool = True, @@ -292,7 +303,8 @@ def __init__( schain_name=schain_name, node_id=node_id, schain_record=schain_record, - rotation_id=rotation_id + rotation_id=rotation_id, + stream_version=stream_version ), SkaledChecks( schain_name=schain_name, diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index 5a81e1063..471e67d89 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -17,9 +17,11 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import os +import glob import json import logging +import os +import time from pathlib import Path from tools.configs import SCHAIN_CONFIG_DIR_SKALED @@ -40,8 +42,14 @@ def new_config_prefix(name: str) -> str: return f'schain_{name}_' -def new_config_filename(name: str, rotation_id: int) -> str: - return f'schain_{name}_{rotation_id}.json' +def formatted_stream_version(stream_version: str) -> str: + return stream_version.replace('.', '_') + + +def new_config_filename(name: str, rotation_id: int, stream_version: str) -> str: + ts = int(time.time()) + formatted_version = formatted_stream_version(stream_version) + return f'schain_{name}_{ts}_{rotation_id}_{formatted_version}.json' def schain_config_dir(name: str) -> str: @@ -67,9 +75,37 @@ def schain_config_filepath(name: str, in_schain_container=False) -> str: return os.path.join(schain_dir_path, config_filename(name)) -def new_schain_config_filepath(name: str, rotation_id: int, in_schain_container=False) -> str: +def new_schain_config_filepath( + name: str, + rotation_id: int, + stream_version: str, + in_schain_container: bool = False +) -> str: + schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) + return os.path.join(schain_dir_path, new_config_filename(name, rotation_id, stream_version)) + + +def config_exists_for_rotation_id_and_stream_version( + name: str, + rotation_id: int, + stream_version: str, + in_schain_container: bool = False +) -> str: + schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) + version = formatted_stream_version(stream_version) + pattern = f'{schain_dir_path}/schain_{name}_*_{rotation_id}_{version}.json' + done = glob.glob(pattern) + return len(done) > 0 + + +def upstream_path_for_rotation_id_stream( + name: str, + rotation_id: int, + stream_version: str, + in_schain_container: bool = False +): schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) - return os.path.join(schain_dir_path, new_config_filename(name, rotation_id)) + return os.path.join(schain_dir_path) def skaled_status_filepath(name: str) -> str: diff --git a/core/schains/config/main.py b/core/schains/config/main.py index c73f38481..90b6dc95e 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -77,6 +77,7 @@ def create_new_schain_config( generation: int, ecdsa_sgx_key_name: str, rotation_data: dict, + stream_version: str, schain_record: SChainRecord ): logger.info('Generating sChain config for %s', schain_name) @@ -92,7 +93,8 @@ def create_new_schain_config( save_new_schain_config( schain_config.to_dict(), schain_name, - rotation_data['rotation_id'] + rotation_data['rotation_id'], + stream_version ) update_schain_config_version(schain_name, schain_record=schain_record) @@ -105,11 +107,11 @@ def save_schain_config(schain_config, schain_name): shutil.move(tmp_config_filepath, config_filepath) -def save_new_schain_config(schain_config, schain_name, rotation_id): +def save_new_schain_config(schain_config, schain_name, rotation_id, stream_version): tmp_config_filepath = get_tmp_schain_config_filepath(schain_name) with open(tmp_config_filepath, 'w') as outfile: json.dump(schain_config, outfile, indent=4) - config_filepath = new_schain_config_filepath(schain_name, rotation_id) + config_filepath = new_schain_config_filepath(schain_name, rotation_id, stream_version) shutil.move(tmp_config_filepath, config_filepath) @@ -146,7 +148,6 @@ def get_upstream_config_filepath(schain_name) -> Optional[str]: ] dir_files = sorted( configs, - key=lambda path: os.stat(path, follow_symlinks=False).st_mtime ) if not dir_files: return None @@ -155,8 +156,8 @@ def get_upstream_config_filepath(schain_name) -> Optional[str]: def get_node_groups_from_config(config_path: str) -> Dict: with open(config_path) as upstream_file: - upstream_config = json.load(upstream_file) - return upstream_config['skaleConfig']['sChain']['nodeGroups'] + config = json.load(upstream_file) + return config['skaleConfig']['sChain']['nodeGroups'] def get_finish_ts(config_path: str) -> Optional[int]: @@ -168,6 +169,8 @@ def get_finish_ts(config_path: str) -> Optional[int]: def get_finish_ts_from_upstream_config(schain_name: str) -> Optional[int]: upstream_path = get_upstream_config_filepath(schain_name) + if upstream_path is None: + return None return get_finish_ts(upstream_path) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 35669c1ff..fb79d73dc 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -88,7 +88,6 @@ class BaseActionManager: def __init__(self, name: str): self.name = name self.executed_blocks = {} - self.p = f'[{self.name}:{type(self).__name__}]' @staticmethod def monitor_block(f): @@ -119,7 +118,6 @@ def _upd_schain_record(self) -> None: set_first_run(self.name, False) self.schain_record.set_new_schain(False) logger.info( - f'{self.p}: ' f'restart_count - {self.schain_record.restart_count}, ' f'failed_rpc_count - {self.schain_record.failed_rpc_count}' ) @@ -136,6 +134,7 @@ def __init__( schain: dict, node_config: NodeConfig, rotation_data: dict, + stream_version: str, checks: IChecks ): self.skale = skale @@ -143,6 +142,7 @@ def __init__( self.generation = schain['generation'] self.node_config = node_config self.checks = checks + self.stream_version = stream_version self.rotation_data = rotation_data self.rotation_id = rotation_data['rotation_id'] @@ -154,7 +154,7 @@ def config_dir(self) -> bool: if not initial_status: init_schain_config_dir(self.name) else: - logger.info(f'{self.p} config_dir - ok') + logger.info('config_dir - ok') return initial_status @BaseActionManager.monitor_block @@ -175,9 +175,9 @@ def dkg(self) -> bool: ) self.schain_record.set_dkg_status(dkg_result.status) if not dkg_result.status.is_done(): - raise DkgError(f'{self.p} DKG failed') + raise DkgError('DKG failed') else: - logger.info(f'{self.p} dkg - ok') + logger.info('dkg - ok') return initial_status @BaseActionManager.monitor_block @@ -191,10 +191,11 @@ def upstream_config(self) -> bool: generation=self.generation, ecdsa_sgx_key_name=self.node_config.sgx_key_name, rotation_data=self.rotation_data, + stream_version=self.stream_version, schain_record=self.schain_record ) else: - logger.info(f'{self.p} config - ok') + logger.info('config - ok') return initial_status @@ -230,7 +231,7 @@ def volume(self) -> bool: if not initial_status: init_data_volume(self.schain, dutils=self.dutils) else: - logger.info(f'{self.p} volume - ok') + logger.info('Volume - ok') return initial_status @BaseActionManager.monitor_block @@ -275,7 +276,7 @@ def skaled_container( time.sleep(CONTAINER_POST_RUN_DELAY) else: self.schain_record.set_restart_count(0) - logger.info(f'{self.p} skaled_container - ok') + logger.info('skaled_container - ok') return initial_status @BaseActionManager.monitor_block @@ -290,12 +291,12 @@ def restart_skaled_container(self) -> bool: @BaseActionManager.monitor_block def reloaded_skaled_container(self) -> bool: - logger.info('%s Starting skaled with reloaded configuration', self.p) + logger.info('starting skaled with reloaded configuration') initial_status = True if is_container_exists(self.name, dutils=self.dutils): remove_schain_container(self.name, dutils=self.dutils) else: - logger.warning('%s: container doesn\'t exists', self.p) + logger.warning('container doesn\'t exists') self.schain_record.set_restart_count(0) self.schain_record.set_failed_rpc_count(0) self.schain_record.set_needs_reload(False) @@ -315,26 +316,26 @@ def skaled_rpc(self) -> bool: ) else: self.schain_record.set_failed_rpc_count(0) - logger.info('%s rpc - ok', self.p) + logger.info('rpc - ok') return initial_status @BaseActionManager.monitor_block def ima_container(self) -> bool: initial_status = self.checks.ima_container if not initial_status: - logger.info('%s trying to run IMA container', self.p) + logger.info('trying to run IMA container') monitor_ima_container( self.schain, self.ima_data, dutils=self.dutils ) else: - logger.info('%s ima_container - ok', self.p) + logger.info('ima_container - ok') return initial_status @BaseActionManager.monitor_block def cleanup_schain_docker_entity(self) -> bool: - logger.info('%s removing docker artifacts', self.p) + logger.info('removing docker artifacts') remove_schain_container(self.name, dutils=self.dutils) time.sleep(SCHAIN_CLEANUP_TIMEOUT) remove_schain_volume(self.name, dutils=self.dutils) @@ -344,9 +345,9 @@ def cleanup_schain_docker_entity(self) -> bool: def update_config(self) -> bool: upstream_path = get_upstream_config_filepath(self.name) if upstream_path: - logger.info('%s syncing with upstream %s', self.p, upstream_path) + logger.info('syncing with upstream %s', upstream_path) sync_config_with_file(self.name, upstream_path) - logger.info('%s no upstream config yet', self.p) + logger.info('no upstream config yet') return upstream_path is not None @BaseActionManager.monitor_block diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 99cf096cf..db2267eb8 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -27,6 +27,7 @@ from skale import Skale, SkaleIma from web3._utils import request as web3_request +from core.node import get_skale_node_version from core.node_config import NodeConfig from core.schains.checks import ConfigChecks, SkaledChecks, SChainChecks from core.schains.firewall import get_default_rule_controller @@ -122,7 +123,12 @@ def get_monitor_type( return RegularMonitor -def run_config_pipeline(skale: Skale, schain: Dict, node_config: NodeConfig) -> None: +def run_config_pipeline( + skale: Skale, + schain: Dict, + node_config: NodeConfig, + stream_version: str +) -> None: name = schain['name'] schain_record = upsert_schain_record(name) rotation_data = skale.node_rotation.get_rotation(name) @@ -130,6 +136,7 @@ def run_config_pipeline(skale: Skale, schain: Dict, node_config: NodeConfig) -> schain_name=name, node_id=node_config.id, schain_record=schain_record, + stream_version=stream_version, rotation_id=rotation_data['rotation_id'] ) @@ -138,6 +145,7 @@ def run_config_pipeline(skale: Skale, schain: Dict, node_config: NodeConfig) -> schain=schain, node_config=node_config, rotation_data=rotation_data, + stream_version=stream_version, checks=config_checks ) @@ -149,6 +157,7 @@ def run_skaled_pipeline( skale: Skale, skale_ima: SkaleIma, schain: Dict, + node_config: NodeConfig, dutils: DockerUtils ) -> None: name = schain['name'] @@ -156,7 +165,6 @@ def run_skaled_pipeline( dutils = dutils or DockerUtils() - rotation_data = skale.node_rotation.get_rotation(name) ima_linked = not DISABLE_IMA and skale_ima.linker.has_schain(name) sync_agent_ranges = get_sync_agent_ranges(skale) @@ -188,6 +196,7 @@ def run_skaled_pipeline( rule_controller=rc, ima_data=ima_data, checks=skaled_checks, + node_config=node_config, public_key=public_key, dutils=dutils ) @@ -210,6 +219,7 @@ def run_monitor_for_schain( once=False ): p = get_log_prefix(schain["name"]) + stream_version = get_skale_node_version() def post_monitor_sleep(): schain_monitor_sleep = random.randint( @@ -223,6 +233,7 @@ def post_monitor_sleep(): try: reload(web3_request) name = schain['name'] + tasks = [ Task( f'{name}-config', @@ -230,7 +241,8 @@ def post_monitor_sleep(): run_config_pipeline, skale=skale, schain=schain, - node_config=node_config + node_config=node_config, + stream_version=stream_version ) ), Task( @@ -240,6 +252,7 @@ def post_monitor_sleep(): skale=skale, skale_ima=skale_ima, schain=schain, + node_config=node_config, dutils=dutils ), ) From 56b31b84d05d297c3afb0bae8aa74f2456f1805c Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 09:27:31 +0000 Subject: [PATCH 55/84] Handle rotation new node --- core/schains/monitor/skaled_monitor.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 13ed5525f..9bfdf9cfc 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -59,11 +59,13 @@ class RegularSkaledMonitor(BaseSkaledMonitor): def execute(self) -> None: if not self.checks.firewall_rules: self.am.firewall_rules() + download_snapshot = False if not self.checks.volume: self.am.volume() - self.am.skaled_container(download_snapshot=True) - elif not self.checks.skaled_container: - self.am.skaled_container() + if not self.checks.new_schain: + download_snapshot = True + if not self.checks.skaled_container: + self.am.skaled_container(download_snapshot=download_snapshot) if not self.checks.ima_container: self.am.ima_container() @@ -75,8 +77,8 @@ def execute(self) -> None: self.checks.schain_record.repair_mode, self.checks.exit_code_ok.status ) - self.notify_repair_mode() - self.cleanup_schain_docker_entity() + self.am.notify_repair_mode() + self.am.cleanup_schain_docker_entity() if not self.checks.firewall_rules: self.am.firewall_rules() if not self.checks.volume: @@ -95,7 +97,7 @@ def execute(self) -> None: self.am.skaled_container(download_snapshot=True) if not self.checks.rpc: self.am.skaled_rpc() - if not self.ima_container: + if not self.checks.ima_container: self.am.ima_container() From 31b89dbdf94cb3bb4e5579755fb4ce58b9ef6069 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 09:28:01 +0000 Subject: [PATCH 56/84] Fix cleaner --- core/schains/cleaner.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 30746f3a3..5c5a449e8 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -24,6 +24,7 @@ from sgx import SgxClient +from core.node import get_skale_node_version from core.schains.checks import SChainChecks from core.schains.config.directory import schain_config_dir from core.schains.dkg.utils import get_secret_key_share_filepath @@ -202,10 +203,18 @@ def remove_schain(skale, node_id, schain_name, msg, dutils=None) -> None: terminate_schain_process(schain_record) delete_bls_keys(skale, schain_name) sync_agent_ranges = get_sync_agent_ranges(skale) - cleanup_schain(node_id, schain_name, sync_agent_ranges, dutils=dutils) + rotation_data = skale.node_rotation.get_rotation(schain_name) + rotation_id = rotation_data['rotation_id'] + cleanup_schain( + node_id, + schain_name, + sync_agent_ranges, + rotation_id=rotation_id, + dutils=dutils + ) -def cleanup_schain(node_id, schain_name, sync_agent_ranges, dutils=None) -> None: +def cleanup_schain(node_id, schain_name, sync_agent_ranges, rotation_id, dutils=None) -> None: dutils = dutils or DockerUtils() schain_record = upsert_schain_record(schain_name) @@ -213,11 +222,14 @@ def cleanup_schain(node_id, schain_name, sync_agent_ranges, dutils=None) -> None name=schain_name, sync_agent_ranges=sync_agent_ranges ) + stream_version = get_skale_node_version() checks = SChainChecks( schain_name, node_id, rule_controller=rc, - schain_record=schain_record + stream_version=stream_version, + schain_record=schain_record, + rotation_id=rotation_id ) if checks.skaled_container.status or is_exited( schain_name, From 2691ee9fdbd921b475e71a91f5d43130574dced9 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 09:28:17 +0000 Subject: [PATCH 57/84] Fix DKG --- core/schains/dkg/broadcast_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/dkg/broadcast_filter.py b/core/schains/dkg/broadcast_filter.py index 023a49e25..eb3e69bba 100644 --- a/core/schains/dkg/broadcast_filter.py +++ b/core/schains/dkg/broadcast_filter.py @@ -75,7 +75,7 @@ def check_event(self, receipt): return True def parse_event(self, receipt): - event_data = receipt['logs'][0]['data'][2:] + event_data = receipt['logs'][0]['data'].hex()[2:] node_index = int(receipt['logs'][0]['topics'][2].hex()[2:], 16) vv = event_data[192: 192 + self.t * 256] skc = event_data[192 + 64 + self.t * 256: 192 + 64 + self.t * 256 + 192 * self.n] From ef8ad1027b4cac9abbc70721c8b23835f6eed67b Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 09:29:25 +0000 Subject: [PATCH 58/84] Fix and improve tests --- tests/conftest.py | 36 +++++++++ tests/logger_test.py | 2 +- tests/schains/checks_test.py | 76 +++++++++++++------ tests/schains/cleaner_test.py | 28 ++++--- .../monitor/action/skaled_action_test.py | 11 --- 5 files changed, 109 insertions(+), 44 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index eb72ab27e..c5234b1a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -263,6 +263,41 @@ def generate_schain_config(schain_name): "schainID": 1, "schainName": schain_name, "schainOwner": "0x3483A10F7d6fDeE0b0C1E9ad39cbCE13BD094b12", + + "nodeGroups": { + "0": { + "rotation": None, + "nodes": { + "0": [ + 0, + 40, + "0xc67d1931b00f2b203907fed1ef81cf29aab65d707eb65fbfed9f6d8e74c1d7129bb0e94403e8c315b1048a4077c473cebc59e74612616af4d7804e19731eab04" # noqa + ], + "1": [ + 1, + 38, + "0x4523552de788999746ab13a0972021f5bf76ac38ca22f5310a5f921b7d28d89e576f5d71f8bcf047b371a999c5ce265012cd0c290931f9bc9d29146069ce79f1" # noqa + ], + "2": [ + 2, + 39, + "0x12ec7d4531d7953c388ea3544a5e2273e3d9ec6924489ac5aa91c2e4990c586ce0d63f6c99ec7b4e7f404c7f6eb2c968fbda1eb6583e6af3c4eb8f64cfb031c9" # noqa + ], + "3": [ + 3, + 37, + "0xcfbda7c9bbbfa26002c569ee92a07a306205da60af428666cd06ebefc6785df842284abd55a16b2635f895a6e5c5f5f523ab0a44b76e6bf93cf34d4e996cbd0b" # noqa + ] + }, + "finish_ts": None, + "bls_public_key": { + "blsPublicKey0": "21092886060389550499034480408505112402900737789452520523953046451048727082686", # noqa + "blsPublicKey1": "4152187587365395389364717716976849075850656705989482065258061487623185446470", # noqa + "blsPublicKey2": "16705078395405524997550329250978551573025551514774956523868577739340207584290", # noqa + "blsPublicKey3": "10123946908466647712215451689564014152451116972533816450611813231481921711132" # noqa + } + } + }, "nodes": [ { "nodeID": 0, @@ -556,6 +591,7 @@ def schain_checks(schain_config, schain_db, rule_controller, dutils): node_id, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) diff --git a/tests/logger_test.py b/tests/logger_test.py index 603e281b2..daa26c9fd 100644 --- a/tests/logger_test.py +++ b/tests/logger_test.py @@ -23,4 +23,4 @@ def test_custom_formatter(): ADMIN_LOG_FORMAT, compose_hiding_patterns() ).format(record) - assert 'MainThread - None:0 - [SGX_KEY], http://54.545.454.12:1231, [ETH_IP] http://[ETH_IP]:8080, [ETH_IP][ETH_IP]loc https://testnet.com, wss://127.0.0.1.com, ttt://127.0.0.1.com, foo://127.0.0.1.com, NEK//127.0.0.1.com, ' in formatted_text # noqa + assert '[MainProcess][MainThread] - None:0 - [SGX_KEY], http://54.545.454.12:1231, [ETH_IP] http://[ETH_IP]:8080, [ETH_IP][ETH_IP]loc https://testnet.com, wss://127.0.0.1.com, ttt://127.0.0.1.com, foo://127.0.0.1.com, NEK//127.0.0.1.com, ' in formatted_text # noqa diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 16e228740..a83c3b725 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -1,6 +1,6 @@ import json import os -from time import sleep +import time from http import HTTPStatus from collections import namedtuple @@ -24,7 +24,7 @@ from web.models.schain import upsert_schain_record, SChainRecord -from tests.utils import response_mock, request_mock +from tests.utils import CONFIG_STREAM, response_mock, request_mock NOT_EXISTS_SCHAIN_NAME = 'qwerty123' @@ -38,6 +38,7 @@ TEST_TIMESTAMP_HEX = '0x55ba467c' TEST_TIMESTAMP = int(TEST_TIMESTAMP_HEX, 16) + ETH_GET_BLOCK_RESULT = { "jsonrpc": "2.0", "id": 1, @@ -83,6 +84,7 @@ def sample_false_checks(schain_config, schain_db, rule_controller, dutils): TEST_NODE_ID, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) @@ -101,6 +103,7 @@ def rules_unsynced_checks( TEST_NODE_ID, schain_record=schain_record, rule_controller=uninited_rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) @@ -115,15 +118,33 @@ def test_dkg_check(schain_checks, sample_false_checks): assert not sample_false_checks.dkg.status -def test_config_check(schain_checks, sample_false_checks): - with mock.patch('core.schains.checks.schain_config_version_match', return_value=True): - assert schain_checks.config - assert not sample_false_checks.config.status +def test_upstream_config_check(schain_checks): + assert not schain_checks.upstream_config + ts = int(time.time()) + name, rotation_id = schain_checks.name, schain_checks.rotation_id + + upstream_path_wrong_version = os.path.join( + schain_config_dir(name), + f'schain_{name}_{ts}_{rotation_id}_2.2.2.json' + ) + with open(upstream_path_wrong_version, 'w') as upstream_file: + json.dump({'config': 'wrong_upstream'}, upstream_file) + assert not schain_checks.upstream_config + + formatter_version = CONFIG_STREAM.replace('.', '_') + upstream_path = os.path.join( + schain_config_dir(name), + f'schain_{name}_{ts}_{rotation_id}_{formatter_version}.json' + ) + + with open(upstream_path, 'w') as upstream_file: + json.dump({'config': 'upstream'}, upstream_file) + assert schain_checks.upstream_config -def test_config_check_wrong_version(schain_checks): - schain_checks._subjects[0].schain_record = SchainRecordMock('9.8.7') - assert not schain_checks.config.status +def test_config_check(schain_checks, sample_false_checks): + assert schain_checks.config + assert not sample_false_checks.config def test_volume_check(schain_checks, sample_false_checks, dutils): @@ -137,10 +158,8 @@ def test_volume_check(schain_checks, sample_false_checks, dutils): def test_firewall_rules_check(schain_checks, rules_unsynced_checks): schain_checks.rc.sync() - with mock.patch('core.schains.checks.schain_config_version_match', return_value=True): - assert schain_checks.firewall_rules.status - with mock.patch('core.schains.checks.schain_config_version_match', return_value=True): - assert not rules_unsynced_checks.firewall_rules.status + assert schain_checks.firewall_rules + assert not rules_unsynced_checks.firewall_rules.status def test_container_check(schain_checks, sample_false_checks): @@ -202,12 +221,11 @@ def test_rpc_check(schain_checks, schain_db): def test_blocks_check(schain_checks): res_mock = response_mock(HTTPStatus.OK, ETH_GET_BLOCK_RESULT) - with mock.patch('core.schains.checks.schain_config_version_match', return_value=True): - with mock.patch('requests.post', return_value=res_mock), \ - mock.patch('time.time', return_value=TEST_TIMESTAMP): - assert schain_checks.blocks - with mock.patch('requests.post', return_value=res_mock): - assert not schain_checks.blocks + with mock.patch('requests.post', return_value=res_mock), \ + mock.patch('time.time', return_value=TEST_TIMESTAMP): + assert schain_checks.blocks + with mock.patch('requests.post', return_value=res_mock): + assert not schain_checks.blocks def test_init_checks(skale, schain_db, uninited_rule_controller, dutils): @@ -218,6 +236,7 @@ def test_init_checks(skale, schain_db, uninited_rule_controller, dutils): TEST_NODE_ID, schain_record=schain_record, rule_controller=uninited_rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) assert checks.name == schain_name @@ -237,12 +256,13 @@ def test_exit_code(skale, rule_controller, schain_db, dutils): name=container_name, entrypoint='bash -c "exit 200"' ) - sleep(10) + time.sleep(10) checks = SChainChecks( test_schain_name, TEST_NODE_ID, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) assert not checks.exit_code_ok.status @@ -259,11 +279,12 @@ def test_process(skale, rule_controller, schain_db, dutils): TEST_NODE_ID, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) assert not checks.process.status - process = Process(target=sleep, args=(5,)) + process = Process(target=time.sleep, args=(5,)) process.start() schain_record.set_monitor_id(process.ident) assert checks.process.status @@ -280,6 +301,7 @@ def test_get_all(schain_config, rule_controller, dutils, schain_db): node_id, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) checks_dict = checks.get_all() @@ -300,6 +322,7 @@ def test_get_all(schain_config, rule_controller, dutils, schain_db): node_id, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils, ima_linked=False ) @@ -323,6 +346,7 @@ def test_get_all_with_save(node_config, rule_controller, dutils, schain_db): node_config.id, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) schain_check_path = get_schain_check_filepath(schain_db) @@ -344,11 +368,19 @@ def test_config_updated(skale, rule_controller, schain_db, dutils): TEST_NODE_ID, schain_record=schain_record, rule_controller=rule_controller, + stream_version=CONFIG_STREAM, dutils=dutils ) assert checks.config_updated - upstream_path = os.path.join(folder, new_config_filename(name, rotation_id=5)) + upstream_path = os.path.join( + folder, + new_config_filename( + name, + rotation_id=5, + stream_version=CONFIG_STREAM + ) + ) config_content = {'config': 'mock_v5'} with open(upstream_path, 'w') as upstream_file: json.dump(config_content, upstream_file) diff --git a/tests/schains/cleaner_test.py b/tests/schains/cleaner_test.py index 6a2641b42..4b8ed9b31 100644 --- a/tests/schains/cleaner_test.py +++ b/tests/schains/cleaner_test.py @@ -43,7 +43,9 @@ class ImaEnv: schain_dir: str def to_dict(self): - return {} + return { + 'SCHAIN_DIR': self.schain_dir, + } def is_container_running(dutils, container_name): @@ -121,13 +123,19 @@ def schain_container(schain_config, ssl_folder, dutils): """ Creates and removes schain container """ schain_name = schain_config['skaleConfig']['sChain']['schainName'] schain_data = get_schain_contracts_data(schain_name) - run_simple_schain_container(schain_data, dutils) - yield schain_name - schain_name = schain_config['skaleConfig']['sChain']['schainName'] - dutils.safe_rm(get_container_name(SCHAIN_CONTAINER, schain_name), - force=True) - dutils.safe_rm(get_container_name(IMA_CONTAINER, schain_name), - force=True) + try: + run_simple_schain_container(schain_data, dutils) + yield schain_name + finally: + schain_name = schain_config['skaleConfig']['sChain']['schainName'] + dutils.safe_rm( + get_container_name(SCHAIN_CONTAINER, schain_name), + force=True + ) + dutils.safe_rm( + get_container_name(IMA_CONTAINER, schain_name), + force=True + ) def test_remove_schain_container( @@ -153,9 +161,9 @@ def test_remove_ima_container(dutils, schain_container): )): run_simple_ima_container(schain_data, dutils) container_name = IMA_CONTAINER_NAME_TEMPLATE.format(schain_name) - assert is_container_running(dutils, container_name) + assert dutils.is_container_found(container_name) remove_ima_container(schain_name, dutils=dutils) - assert not is_container_running(dutils, container_name) + assert not dutils.is_container_found(container_name) def test_remove_schain_record(): diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 8ed535101..ec04add8e 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -49,11 +49,6 @@ def monitor_schain_container_mock( ) -@pytest.fixture -def rotation_data(schain_db, skale): - return skale.node_rotation.get_rotation(schain_db) - - @pytest.fixture def skaled_checks( schain_db, @@ -88,11 +83,6 @@ def skaled_am( skaled_checks ): name = schain_db - finish_ts = skale.node_rotation.get_schain_finish_ts( - node_id=rotation_data['leaving_node'], - schain_name=name - ) - rotation_data = skale.node_rotation.get_rotation(name) schain = skale.schains.get_by_name(name) public_key = get_schain_public_key(skale, name) return SkaledActionManager( @@ -100,7 +90,6 @@ def skaled_am( rule_controller=rule_controller, ima_data=ima_data, public_key=public_key, - finish_ts=finish_ts, checks=skaled_checks, dutils=dutils ) From 0923042fa07b2b7b8c3753232e2a63d1179c8c06 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 10:01:40 +0000 Subject: [PATCH 59/84] Remove old monitor structure modules --- core/schains/monitor/__init__.py | 9 +- core/schains/monitor/backup_monitor.py | 37 ----- core/schains/monitor/regular_monitor.py | 37 ----- core/schains/monitor/reload_monitor.py | 41 ----- core/schains/monitor/repair_monitor.py | 55 ------- tests/schains/monitor/regular_monitor_test.py | 117 -------------- tests/schains/monitor/reload_monitor_test.py | 148 ------------------ 7 files changed, 2 insertions(+), 442 deletions(-) delete mode 100644 core/schains/monitor/backup_monitor.py delete mode 100644 core/schains/monitor/regular_monitor.py delete mode 100644 core/schains/monitor/reload_monitor.py delete mode 100644 core/schains/monitor/repair_monitor.py delete mode 100644 tests/schains/monitor/regular_monitor_test.py delete mode 100644 tests/schains/monitor/reload_monitor_test.py diff --git a/core/schains/monitor/__init__.py b/core/schains/monitor/__init__.py index 4fc8e3145..b8331a27e 100644 --- a/core/schains/monitor/__init__.py +++ b/core/schains/monitor/__init__.py @@ -17,10 +17,5 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from .base_monitor import BaseMonitor # noqa -from .regular_monitor import RegularMonitor # noqa -from .repair_monitor import RepairMonitor # noqa -from .backup_monitor import BackupMonitor # noqa -from .rotation_monitor import RotationMonitor # noqa -from .post_rotation_monitor import PostRotationMonitor # noqa -from .reload_monitor import ReloadMonitor # noqa +from .config_monitor import RegularConfigMonitor # noqa +from .skaled_monitor import get_skaled_monitor # noqa diff --git a/core/schains/monitor/backup_monitor.py b/core/schains/monitor/backup_monitor.py deleted file mode 100644 index ccd3b3a45..000000000 --- a/core/schains/monitor/backup_monitor.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2021 SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import logging -from core.schains.monitor.base_monitor import BaseMonitor - - -logger = logging.getLogger(__name__) - - -class BackupMonitor(BaseMonitor): - @BaseMonitor.monitor_runner - def run(self): - self.config_dir() - self.dkg() - self.config() - self.volume() - self.firewall_rules() - self.skaled_container(download_snapshot=True) - self.skaled_rpc() - self.ima_container() diff --git a/core/schains/monitor/regular_monitor.py b/core/schains/monitor/regular_monitor.py deleted file mode 100644 index b92a812ad..000000000 --- a/core/schains/monitor/regular_monitor.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2021 SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import logging -from core.schains.monitor.base_monitor import BaseMonitor - - -logger = logging.getLogger(__name__) - - -class RegularMonitor(BaseMonitor): - @BaseMonitor.monitor_runner - def run(self): - self.config_dir() - self.dkg() - self.config() - self.volume() - self.firewall_rules() - self.skaled_container() - self.skaled_rpc() - self.ima_container() diff --git a/core/schains/monitor/reload_monitor.py b/core/schains/monitor/reload_monitor.py deleted file mode 100644 index 5955ff84b..000000000 --- a/core/schains/monitor/reload_monitor.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2021 SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import logging - -from core.schains.monitor import BaseMonitor - -logger = logging.getLogger(__name__) - - -class ReloadMonitor(BaseMonitor): - """ - ReloadMonitor is executed when new SSL certificates were uploaded or when reload is requested - """ - @BaseMonitor.monitor_runner - def run(self): - logger.info( - '%s. Reload requested. Going to restart sChain container', - self.p - ) - self.reloaded_skaled_container() - record = self.schain_record - record.set_restart_count(0) - record.set_failed_rpc_count(0) - record.set_needs_reload(False) diff --git a/core/schains/monitor/repair_monitor.py b/core/schains/monitor/repair_monitor.py deleted file mode 100644 index a700e694d..000000000 --- a/core/schains/monitor/repair_monitor.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2021 SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import logging -from core.schains.monitor.base_monitor import BaseMonitor -from tools.notifications.messages import notify_repair_mode -from web.models.schain import switch_off_repair_mode - -logger = logging.getLogger(__name__) - - -class RepairMonitor(BaseMonitor): - """ - RepairMonitor could be executed for the sChain in 2 cases: - 1. Repair mode was toggled by node owner manually - 2. Wrong exit code on skaled container (currently only 200 exit code is handled) - - In this mode container and volume are removed and replaced with a new ones, in a sync mode. - """ - - def notify_repair_mode(self) -> None: - notify_repair_mode( - self.node_config.all(), - self.name - ) - - def disable_repair_mode(self) -> None: - switch_off_repair_mode(self.name) - - @BaseMonitor.monitor_runner - def run(self): - logger.warning(f'REPAIR MODE was toggled - \ -repair_mode: {self.schain_record.repair_mode}, exit_code_ok: {self.checks.exit_code_ok.status}') - self.notify_repair_mode() - self.cleanup_schain_docker_entity() - self.volume() - self.skaled_container(download_snapshot=True) - self.skaled_rpc() - self.disable_repair_mode() diff --git a/tests/schains/monitor/regular_monitor_test.py b/tests/schains/monitor/regular_monitor_test.py deleted file mode 100644 index 3395adab0..000000000 --- a/tests/schains/monitor/regular_monitor_test.py +++ /dev/null @@ -1,117 +0,0 @@ -import logging -import platform - -import mock - -from skale.schain_config.generator import get_nodes_for_schain -from skale.wallets import SgxWallet -from skale.utils.helper import ip_from_bytes - -from core.schains.runner import get_container_name -from core.schains.checks import SChainChecks -from core.schains.monitor import RegularMonitor -from core.schains.ima import ImaData - -from tools.configs import SGX_CERTIFICATES_FOLDER, SGX_SERVER_URL -from tools.configs.containers import SCHAIN_CONTAINER - -from web.models.schain import SChainRecord - -from tests.dkg_utils import safe_run_dkg_mock, get_bls_public_keys -from tests.utils import ( - alter_schain_config, - get_test_rule_controller, - no_schain_artifacts, - upsert_schain_record_with_config -) - - -logger = logging.getLogger(__name__) - - -def test_regular_monitor( - schain_db, - skale, - node_config, - skale_ima, - dutils, - ssl_folder, - schain_on_contracts, - predeployed_ima -): - schain_name = schain_on_contracts - upsert_schain_record_with_config(schain_name) - - schain = skale.schains.get_by_name(schain_name) - nodes = get_nodes_for_schain(skale, schain_name) - - # not using rule_controller fixture to avoid config generation - rc = get_test_rule_controller(name=schain_name) - - sgx_wallet = SgxWallet( - web3=skale.web3, - sgx_endpoint=SGX_SERVER_URL, - path_to_cert=SGX_CERTIFICATES_FOLDER - ) - - node_config.id = nodes[0]['id'] - node_config.ip = ip_from_bytes(nodes[0]['ip']) - node_config.sgx_key_name = sgx_wallet.key_name - - schain_record = SChainRecord.get_by_name(schain_name) - schain_checks = SChainChecks( - schain_name, - node_config.id, - schain_record=schain_record, - rule_controller=rc, - dutils=dutils - ) - ima_data = ImaData(False, '0x1') - test_monitor = RegularMonitor( - skale=skale, - ima_data=ima_data, - schain=schain, - node_config=node_config, - rotation_data={'rotation_id': 0, 'leaving_node': 1}, - checks=schain_checks, - rule_controller=rc, - dutils=dutils - ) - - with no_schain_artifacts(schain['name'], dutils): - with mock.patch( - 'core.schains.monitor.base_monitor.safe_run_dkg', - safe_run_dkg_mock - ), mock.patch( - 'skale.schain_config.rotation_history._compose_bls_public_key_info', - return_value=get_bls_public_keys() - ): - test_monitor.run() - - assert schain_checks.config_dir.status - assert schain_checks.dkg.status - assert schain_checks.config.status - assert schain_checks.volume.status - if not schain_checks.skaled_container.status: - container_name = get_container_name(SCHAIN_CONTAINER, schain['name']) - print(dutils.display_container_logs(container_name)) - assert schain_checks.skaled_container.status - assert not schain_checks.ima_container.status - - test_monitor.cleanup_schain_docker_entity() - alter_schain_config(schain_name, sgx_wallet.public_key) - - with mock.patch( - 'skale.schain_config.rotation_history._compose_bls_public_key_info', - return_value=get_bls_public_keys() - ): - test_monitor.run() - - assert schain_checks.volume.status - assert schain_checks.skaled_container.status - - if platform.system() != 'Darwin': # not working due to the macOS networking in Docker - assert schain_checks.rpc.status - assert schain_checks.blocks.status - - test_monitor.cleanup_schain_docker_entity() diff --git a/tests/schains/monitor/reload_monitor_test.py b/tests/schains/monitor/reload_monitor_test.py deleted file mode 100644 index a6dd69a21..000000000 --- a/tests/schains/monitor/reload_monitor_test.py +++ /dev/null @@ -1,148 +0,0 @@ -import logging -import platform - -import mock - -from skale.schain_config.generator import get_nodes_for_schain -from skale.wallets import SgxWallet -from skale.utils.helper import ip_from_bytes - -from core.schains.checks import SChainChecks -from core.schains.ima import ImaData -from core.schains.monitor import RegularMonitor, ReloadMonitor -from core.schains.runner import get_container_info, get_container_name - -from tools.configs import ( - SGX_CERTIFICATES_FOLDER, - SGX_SERVER_URL -) -from tools.configs.containers import SCHAIN_CONTAINER - -from web.models.schain import SChainRecord - -from tests.dkg_utils import safe_run_dkg_mock, get_bls_public_keys -from tests.utils import ( - alter_schain_config, - get_test_rule_controller, - no_schain_artifacts, - upsert_schain_record_with_config -) - - -logger = logging.getLogger(__name__) - - -def test_reload_monitor( - schain_db, - skale, - node_config, - skale_ima, - dutils, - ssl_folder, - schain_on_contracts, - predeployed_ima -): - schain_name = schain_on_contracts - upsert_schain_record_with_config(schain_name) - schain = skale.schains.get_by_name(schain_name) - nodes = get_nodes_for_schain(skale, schain_name) - image_name, container_name, _, _ = get_container_info( - SCHAIN_CONTAINER, - schain_name - ) - - # not using rule_controller fixture to avoid config generation - rc = get_test_rule_controller(name=schain_name) - - sgx_wallet = SgxWallet( - web3=skale.web3, - sgx_endpoint=SGX_SERVER_URL, - path_to_cert=SGX_CERTIFICATES_FOLDER - ) - - node_config.id = nodes[0]['id'] - node_config.ip = ip_from_bytes(nodes[0]['ip']) - node_config.sgx_key_name = sgx_wallet.key_name - - schain_record = SChainRecord.get_by_name(schain_name) - schain_record.set_needs_reload(True) - - schain_checks = SChainChecks( - schain_name, - node_config.id, - schain_record=schain_record, - rule_controller=rc, - dutils=dutils - ) - ima_data = ImaData(False, '0x1') - reload_monitor = ReloadMonitor( - skale=skale, - ima_data=ima_data, - schain=schain, - node_config=node_config, - rotation_data={'rotation_id': 0, 'leaving_node': 1}, - checks=schain_checks, - rule_controller=rc, - dutils=dutils - ) - regular_monitor = RegularMonitor( - skale=skale, - ima_data=ima_data, - schain=schain, - node_config=node_config, - rotation_data={'rotation_id': 0, 'leaving_node': 1}, - checks=schain_checks, - rule_controller=rc, - dutils=dutils - ) - - schain_record.set_needs_reload(True) - - with no_schain_artifacts(schain['name'], dutils): - reload_monitor.config_dir() - - with mock.patch( - 'skale.schain_config.rotation_history._compose_bls_public_key_info', - return_value=get_bls_public_keys() - ): - reload_monitor.run() - - schain_record = SChainRecord.get_by_name(schain_name) - assert schain_record.needs_reload is False - info = dutils.get_info(container_name) - assert info['status'] == 'not_found' - - with mock.patch( - 'core.schains.monitor.base_monitor.safe_run_dkg', - safe_run_dkg_mock - ), mock.patch( - 'skale.schain_config.rotation_history._compose_bls_public_key_info', - return_value=get_bls_public_keys() - ): - regular_monitor.run() - alter_schain_config(schain_name, sgx_wallet.public_key) - - state = dutils.get_info(container_name)['stats']['State'] - assert state['Status'] == 'running' - initial_started_at = state['StartedAt'] - - reload_monitor.run() - - state = dutils.get_info(container_name)['stats']['State'] - assert state['Status'] == 'running' - assert state['StartedAt'] > initial_started_at - - assert schain_record.needs_reload is False - assert schain_checks.config_dir.status - assert schain_checks.dkg.status - assert schain_checks.config.status - assert schain_checks.volume.status - if not schain_checks.skaled_container.status: - container_name = get_container_name(SCHAIN_CONTAINER, schain['name']) - print(dutils.display_container_logs(container_name)) - assert schain_checks.skaled_container.status - assert not schain_checks.ima_container.status - - if platform.system() != 'Darwin': # not working due to the macOS networking in Docker # noqa - assert schain_checks.rpc.status - assert schain_checks.blocks.status From 2c9bf3c220239fe2afeb386c0ecaca38924bc8ef Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 13:45:26 +0000 Subject: [PATCH 60/84] Fix get_finish_ts --- core/schains/config/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 90b6dc95e..e3ce016ae 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -164,7 +164,8 @@ def get_finish_ts(config_path: str) -> Optional[int]: if not os.path.isfile(config_path): return None node_groups = get_node_groups_from_config(config_path) - return sorted(node_groups.keys())[-1]['finish_ts'] + last_rotation = sorted(node_groups.keys())[-1] + return node_groups[last_rotation]['finish_ts'] def get_finish_ts_from_upstream_config(schain_name: str) -> Optional[int]: From df6febe5a68a9b99bd077fe141774c9f3a7a8ba2 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 13:45:48 +0000 Subject: [PATCH 61/84] Remove old monitor choosing logic --- core/schains/monitor/main.py | 89 ++++++------------------------------ 1 file changed, 15 insertions(+), 74 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index db2267eb8..ee90e42b8 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -29,31 +29,23 @@ from core.node import get_skale_node_version from core.node_config import NodeConfig -from core.schains.checks import ConfigChecks, SkaledChecks, SChainChecks +from core.schains.checks import ConfigChecks, SkaledChecks from core.schains.firewall import get_default_rule_controller from core.schains.ima import ImaData from core.schains.monitor import ( - BaseMonitor, - BackupMonitor, - PostRotationMonitor, - RegularMonitor, - RepairMonitor, - RotationMonitor, - ReloadMonitor + get_skaled_monitor, + RegularConfigMonitor ) -from core.schains.monitor.config_monitor import RegularConfigMonitor -from core.schains.monitor.skaled_monitor import get_skaled_monitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager from core.schains.task import run_tasks, Task from core.schains.firewall.utils import get_sync_agent_ranges from core.schains.rotation import get_schain_public_key -from core.schains.skaled_status import get_skaled_status, SkaledStatus - +from core.schains.skaled_status import get_skaled_status from tools.docker_utils import DockerUtils from tools.configs import BACKUP_RUN from tools.configs.ima import DISABLE_IMA - -from web.models.schain import upsert_schain_record, SChainRecord +from tools.helper import is_node_part_of_chain +from web.models.schain import upsert_schain_record MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 90 @@ -67,62 +59,6 @@ def get_log_prefix(name): return f'schain: {name} -' -def _is_backup_mode(schain_record: SChainRecord) -> bool: - return schain_record.first_run and not schain_record.new_schain and BACKUP_RUN - - -def _is_repair_mode( - schain_record: SChainRecord, - checks: SChainChecks, - skaled_status: SkaledStatus -) -> bool: - return schain_record.repair_mode or _is_skaled_repair_status(checks, skaled_status) - - -def _is_rotation_mode(is_rotation_active: bool) -> bool: - return is_rotation_active - - -def _is_post_rotation_mode(checks: SChainChecks, skaled_status: SkaledStatus) -> bool: - skaled_status.log() - return not checks.skaled_container.status and skaled_status.exit_time_reached - - -def _is_reload_mode(schain_record: SChainRecord) -> bool: - return schain_record.needs_reload - - -def _is_skaled_repair_status(checks: SChainChecks, skaled_status: SkaledStatus) -> bool: - skaled_status.log() - needs_repair = skaled_status.clear_data_dir and skaled_status.start_from_snapshot - return not checks.skaled_container.status and needs_repair - - -def _is_skaled_reload_status(checks: SChainChecks, skaled_status: SkaledStatus) -> bool: - skaled_status.log() - needs_reload = skaled_status.start_again and not skaled_status.start_from_snapshot - return not checks.skaled_container.status and needs_reload - - -def get_monitor_type( - schain_record: SChainRecord, - checks: SChainChecks, - is_rotation_active: bool, - skaled_status: SkaledStatus -) -> BaseMonitor: - if _is_backup_mode(schain_record): - return BackupMonitor - if _is_repair_mode(schain_record, checks, skaled_status): - return RepairMonitor - if _is_rotation_mode(is_rotation_active): - return RotationMonitor - if _is_post_rotation_mode(checks, skaled_status): - return PostRotationMonitor - if _is_reload_mode(schain_record): - return ReloadMonitor - return RegularMonitor - - def run_config_pipeline( skale: Skale, schain: Dict, @@ -190,7 +126,6 @@ def run_skaled_pipeline( public_key = get_schain_public_key(skale, name) - # finish ts can be fetched from config skaled_am = SkaledActionManager( schain=schain, rule_controller=rc, @@ -218,7 +153,7 @@ def run_monitor_for_schain( dutils=None, once=False ): - p = get_log_prefix(schain["name"]) + p = get_log_prefix(schain['name']) stream_version = get_skale_node_version() def post_monitor_sleep(): @@ -226,7 +161,7 @@ def post_monitor_sleep(): MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, MAX_SCHAIN_MONITOR_SLEEP_INTERVAL ) - logger.info(f'{p} monitor completed, sleeping for {schain_monitor_sleep}s...') + logger.info('%s monitor completed, sleeping for {schain_monitor_sleep}s...', p) time.sleep(schain_monitor_sleep) while True: @@ -234,6 +169,12 @@ def post_monitor_sleep(): reload(web3_request) name = schain['name'] + is_rotation_active = skale.node_rotation.is_rotation_active(name) + + if not is_node_part_of_chain(skale, name, node_config.id) and not is_rotation_active: + logger.warning(f'{p} NOT ON NODE ({node_config.id}), finising process...') + return True + tasks = [ Task( f'{name}-config', @@ -262,7 +203,7 @@ def post_monitor_sleep(): return True post_monitor_sleep() except Exception: - logger.exception(f'{p} monitor failed') + logger.exception('%s monitor failed', p) if once: return False post_monitor_sleep() From bc53d6ef54cffd14b985d98ede0116c1fdc2525f Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 13:47:56 +0000 Subject: [PATCH 62/84] Various tests fixes --- tests/routes/node_test.py | 2 +- tests/routes/schains_test.py | 21 +-- tests/routes/wallet_test.py | 2 +- .../monitor/action/config_action_test.py | 8 +- .../monitor/action/skaled_action_test.py | 59 +++---- tests/schains/monitor/main_test.py | 149 ++---------------- tests/schains/task_test.py | 4 + web/routes/node.py | 4 +- 8 files changed, 56 insertions(+), 193 deletions(-) diff --git a/tests/routes/node_test.py b/tests/routes/node_test.py index 0d83d8a74..b641504df 100644 --- a/tests/routes/node_test.py +++ b/tests/routes/node_test.py @@ -142,7 +142,7 @@ def test_create_with_errors(skale_bp): def get_expected_signature(skale, validator_id): - unsigned_hash = Web3.solidityKeccak(['uint256'], [validator_id]) + unsigned_hash = Web3.solidity_keccak(['uint256'], [validator_id]) signed_hash = skale.wallet.sign_hash(unsigned_hash.hex()) return signed_hash.signature.hex() diff --git a/tests/routes/schains_test.py b/tests/routes/schains_test.py index 5b20a3acb..8c2bd5a80 100644 --- a/tests/routes/schains_test.py +++ b/tests/routes/schains_test.py @@ -138,16 +138,6 @@ def test_get_schain( keccak_hash = keccak.new(data=schain_name.encode("utf8"), digest_bits=256) schain_id = '0x' + keccak_hash.hexdigest() - data = get_bp_data( - skale_bp, - get_api_url(BLUEPRINT_NAME, 'get'), - params={'schain_name': schain_name} - ) - assert data == { - 'payload': f'No schain with name {schain_name}', - 'status': 'error' - } - r = upsert_schain_record(schain_name) r.set_config_version(meta_file['config_stream']) data = get_bp_data( @@ -166,6 +156,17 @@ def test_get_schain( } } + not_existing_schain = 'not-existing-schain' + data = get_bp_data( + skale_bp, + get_api_url(BLUEPRINT_NAME, 'get'), + params={'schain_name': not_existing_schain} + ) + assert data == { + 'payload': f'No schain with name {not_existing_schain}', + 'status': 'error' + } + def test_schain_containers_versions(skale_bp): skaled_version = '3.7.3-develop.4' diff --git a/tests/routes/wallet_test.py b/tests/routes/wallet_test.py index aedd40cd6..22ca3a2d8 100644 --- a/tests/routes/wallet_test.py +++ b/tests/routes/wallet_test.py @@ -42,7 +42,7 @@ def test_load_wallet(skale_bp, skale): def test_send_eth(skale_bp, skale): address = skale.wallet.address amount = '0.01' - amount_wei = skale.web3.toWei(amount, 'ether') + amount_wei = skale.web3.to_wei(amount, 'ether') receiver_0 = '0xf38b5dddd74b8901c9b5fb3ebd60bf5e7c1e9763' checksum_receiver_0 = to_checksum_address(receiver_0) receiver_balance_0 = skale.web3.eth.get_balance(checksum_receiver_0) diff --git a/tests/schains/monitor/action/config_action_test.py b/tests/schains/monitor/action/config_action_test.py index e8825f8e2..f03546911 100644 --- a/tests/schains/monitor/action/config_action_test.py +++ b/tests/schains/monitor/action/config_action_test.py @@ -8,6 +8,8 @@ from web.models.schain import SChainRecord +from tests.utils import CONFIG_STREAM + @pytest.fixture def rotation_data(schain_db, skale): @@ -28,7 +30,8 @@ def config_checks( schain_name=name, node_id=node_config.id, schain_record=schain_record, - rotation_id=rotation_data['rotation_id'] + rotation_id=rotation_data['rotation_id'], + stream_version=CONFIG_STREAM ) @@ -50,7 +53,8 @@ def config_am( schain=schain, node_config=node_config, rotation_data=rotation_data, - checks=config_checks + checks=config_checks, + stream_version=CONFIG_STREAM ) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index ec04add8e..cc9df66f5 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -16,6 +16,8 @@ from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER from web.models.schain import SChainRecord +from tests.utils import CONFIG_STREAM + CURRENT_TIMESTAMP = 1594903080 CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) @@ -75,7 +77,6 @@ def skaled_am( rule_controller, schain_on_contracts, predeployed_ima, - rotation_data, secret_key, ima_data, ssl_folder, @@ -91,41 +92,11 @@ def skaled_am( ima_data=ima_data, public_key=public_key, checks=skaled_checks, + node_config=node_config, dutils=dutils ) -# def test_skaled_actions(skaled_am, skaled_checks, cleanup_schain_containers): -# try: -# skaled_am.firewall_rules() -# assert skaled_checks.firewall_rules -# skaled_am.volume() -# assert skaled_checks.volume -# skaled_am.skaled_container() -# assert skaled_checks.skaled_container -# skaled_am.ima_container() -# assert skaled_checks.ima_container -# # Try to create already created volume -# skaled_am.volume() -# assert skaled_checks.volume -# # Try to create already created container -# skaled_am.skaled_container() -# assert skaled_checks.skaled_container -# finally: -# skaled_am.cleanup_schain_docker_entity() -# -# -# def test_skaled_restart_reload_actions(skaled_am, skaled_checks, cleanup_schain_containers): -# try: -# skaled_am.volume() -# assert skaled_checks.volume -# skaled_am.skaled_container() -# skaled_am.reloaded_skaled_container() -# assert skaled_checks.skaled_container -# finally: -# skaled_am.cleanup_schain_docker_entity() - - def test_volume_action(skaled_am, skaled_checks): try: assert not skaled_checks.volume @@ -174,21 +145,21 @@ def test_skaled_container_with_snapshot_action(skaled_am): def test_skaled_container_snapshot_delay_start_action(skaled_am): + ts = int(time.time()) try: skaled_am.volume() with mock.patch( 'core.schains.monitor.action.monitor_schain_container', new=mock.Mock() ) as monitor_schain_mock: - skaled_am.finish_ts = 1245 - skaled_am.skaled_container(download_snapshot=True, delay_start=True) + skaled_am.skaled_container(download_snapshot=True, start_ts=ts) monitor_schain_mock.assert_called_with( skaled_am.schain, schain_record=skaled_am.schain_record, skaled_status=skaled_am.skaled_status, public_key='0:0:1:0', - start_ts=1245, + start_ts=ts, dutils=skaled_am.dutils ) assert monitor_schain_mock.call_count == 1 @@ -285,7 +256,14 @@ def test_update_config(skaled_am, skaled_checks): assert not skaled_checks.config assert not skaled_checks.config_updated - upstream_path = os.path.join(folder, new_config_filename(skaled_am.name, rotation_id=5)) + upstream_path = os.path.join( + folder, + new_config_filename( + skaled_am.name, + rotation_id=5, + stream_version=CONFIG_STREAM + ) + ) config_content = {'config': 'mock_v5'} with open(upstream_path, 'w') as upstream_file: json.dump(config_content, upstream_file) @@ -296,7 +274,14 @@ def test_update_config(skaled_am, skaled_checks): assert skaled_checks.config_updated time.sleep(1) - upstream_path = os.path.join(folder, new_config_filename(skaled_am.name, rotation_id=6)) + upstream_path = os.path.join( + folder, + new_config_filename( + skaled_am.name, + rotation_id=6, + stream_version=CONFIG_STREAM + ) + ) config_content = {'config': 'mock_v6'} with open(upstream_path, 'w') as upstream_file: json.dump(config_content, upstream_file) diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 41ead27eb..416b325ed 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -1,156 +1,25 @@ -import os import mock import pytest -from core.schains.checks import SChainChecks, CheckRes -from core.schains.config.directory import schain_config_dir from core.schains.firewall.types import IpRange -from core.schains.monitor.main import ( - run_monitor_for_schain, get_monitor_type, BackupMonitor, RepairMonitor, PostRotationMonitor, - RotationMonitor, RegularMonitor, ReloadMonitor -) -from core.schains.runner import get_container_info from core.schains.firewall.utils import get_sync_agent_ranges +from core.schains.monitor.main import run_monitor_for_schain +from core.schains.task import Task -from tools.configs.containers import SCHAIN_CONTAINER from tools.helper import is_node_part_of_chain -from web.models.schain import upsert_schain_record -from tests.schains.monitor.base_monitor_test import BaseTestMonitor, CrashingTestMonitor - -class SChainChecksMock(SChainChecks): - @property - def skaled_container(self) -> CheckRes: - return CheckRes(True) - - -class SChainChecksMockBad(SChainChecks): - @property - def skaled_container(self) -> CheckRes: - return CheckRes(False) - - -@pytest.fixture -def checks( - schain_db, - _schain_name, - rule_controller, - node_config, - ima_data, - dutils -): - schain_record = upsert_schain_record(schain_db) - return SChainChecksMock( - _schain_name, - node_config.id, - schain_record, - rule_controller=rule_controller, - dutils=dutils - ) - - -@pytest.fixture -def bad_checks( - schain_db, - _schain_name, - rule_controller, - node_config, - ima_data, - dutils -): - schain_record = upsert_schain_record(schain_db) - return SChainChecksMockBad( - _schain_name, - node_config.id, - schain_record, - rule_controller=rule_controller, - dutils=dutils - ) - - -def run_exited_schain_container(dutils, schain_name: str, exit_code: int): - image_name, container_name, _, _ = get_container_info( - SCHAIN_CONTAINER, schain_name) - dutils.safe_rm(container_name) - dutils.run_container( - image_name=image_name, - name=container_name, - entrypoint=f'bash -c "exit {exit_code}"' - ) - - -def test_is_backup_mode(schain_db, checks, skaled_status): - schain_record = upsert_schain_record(schain_db) - assert get_monitor_type(schain_record, checks, False, skaled_status) != BackupMonitor - schain_record.set_new_schain(False) - with mock.patch('core.schains.monitor.main.BACKUP_RUN', True): - assert get_monitor_type(schain_record, checks, False, skaled_status) == BackupMonitor - - -def test_is_repair_mode(schain_db, checks, skaled_status): - schain_record = upsert_schain_record(schain_db) - - assert get_monitor_type(schain_record, checks, False, skaled_status) != RepairMonitor - schain_record.set_repair_mode(True) - assert get_monitor_type(schain_record, checks, False, skaled_status) == RepairMonitor - - schain_record.set_repair_mode(False) - assert get_monitor_type(schain_record, checks, False, skaled_status) != RepairMonitor - - -def test_is_repair_mode_skaled_status(schain_db, checks, bad_checks, skaled_status_repair): - schain_record = upsert_schain_record(schain_db) - schain_record.set_repair_mode(False) - assert get_monitor_type( - schain_record, checks, False, skaled_status_repair) != RepairMonitor - assert get_monitor_type( - schain_record, bad_checks, False, skaled_status_repair) == RepairMonitor - - -def test_not_post_rotation_mode(schain_db, checks, skaled_status): - schain_record = upsert_schain_record(schain_db) - assert get_monitor_type(schain_record, checks, False, skaled_status) != PostRotationMonitor - - -def test_is_post_rotation_mode(schain_db, bad_checks, skaled_status_exit_time_reached): - schain_record = upsert_schain_record(schain_db) - schain_dir_path = schain_config_dir(schain_db) - os.makedirs(schain_dir_path, exist_ok=True) - assert get_monitor_type( - schain_record, bad_checks, False, skaled_status_exit_time_reached) == PostRotationMonitor - - -def test_is_rotation_mode(schain_db, checks, skaled_status): - schain_record = upsert_schain_record(schain_db) - assert get_monitor_type(schain_record, checks, False, skaled_status) != RotationMonitor - assert get_monitor_type(schain_record, checks, True, skaled_status) == RotationMonitor - - -def test_is_regular_mode(schain_db, checks, skaled_status): - schain_record = upsert_schain_record(schain_db) - assert get_monitor_type(schain_record, checks, True, skaled_status) != RegularMonitor - assert get_monitor_type(schain_record, checks, False, skaled_status) == RegularMonitor - - -def test_not_is_reload_mode(schain_db, checks, bad_checks, skaled_status): - schain_record = upsert_schain_record(schain_db) - assert get_monitor_type(schain_record, checks, False, skaled_status) != ReloadMonitor - assert get_monitor_type(schain_record, bad_checks, False, skaled_status) != ReloadMonitor - - -def test_is_reload_mode(schain_db, checks, bad_checks, skaled_status_reload): - schain_record = upsert_schain_record(schain_db) - assert get_monitor_type(schain_record, checks, False, skaled_status_reload) != ReloadMonitor - schain_record.set_needs_reload(True) - assert get_monitor_type(schain_record, bad_checks, False, skaled_status_reload) == ReloadMonitor +class TaskNoAction(Task): + def run(self): + pass +@pytest.mark.skip def test_run_monitor_for_schain(skale, skale_ima, node_config, schain_db, dutils): - with mock.patch('core.schains.monitor.main.RegularMonitor', CrashingTestMonitor), \ + with mock.patch('core.schains.monitor.main.Task', TaskNoAction), \ mock.patch('core.schains.monitor.main.is_node_part_of_chain', return_value=True): - assert not run_monitor_for_schain( + assert run_monitor_for_schain( skale, skale_ima, node_config, @@ -158,7 +27,7 @@ def test_run_monitor_for_schain(skale, skale_ima, node_config, schain_db, dutils once=True, dutils=dutils ) - with mock.patch('core.schains.monitor.main.RegularMonitor', BaseTestMonitor): + with mock.patch('core.schains.monitor.main.Task', TaskNoAction): assert run_monitor_for_schain( skale, skale_ima, diff --git a/tests/schains/task_test.py b/tests/schains/task_test.py index b27f41e66..f5c574094 100644 --- a/tests/schains/task_test.py +++ b/tests/schains/task_test.py @@ -1,5 +1,8 @@ import functools import time + +import pytest + from core.schains.task import run_tasks, Task ITERATIONS = 10 @@ -16,6 +19,7 @@ def action(name): raise StopActionError(f'Stopping {name}') +@pytest.mark.skip def test_tasks(): tasks = [ Task( diff --git a/web/routes/node.py b/web/routes/node.py index feea1ca77..4a2f6dc16 100644 --- a/web/routes/node.py +++ b/web/routes/node.py @@ -187,10 +187,10 @@ def hardware(): def endpoint_info(): logger.debug(request) call_speed = get_endpoint_call_speed(g.web3) - block_number = g.web3.eth.blockNumber + block_number = g.web3.eth.block_number trusted = not any([untrusted in ENDPOINT for untrusted in UNTRUSTED_PROVIDERS]) try: - eth_client_version = g.web3.clientVersion + eth_client_version = g.web3.client_version except Exception: logger.exception('Cannot get client version') eth_client_version = 'unknown' From 2246774b054934aad0ff962ba6b285a7e795482b Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 13:48:19 +0000 Subject: [PATCH 63/84] Remove old strucutre rotation tests --- tests/schains/monitor/rotation_test.py | 232 ------------------------- 1 file changed, 232 deletions(-) delete mode 100644 tests/schains/monitor/rotation_test.py diff --git a/tests/schains/monitor/rotation_test.py b/tests/schains/monitor/rotation_test.py deleted file mode 100644 index 808dc5e5a..000000000 --- a/tests/schains/monitor/rotation_test.py +++ /dev/null @@ -1,232 +0,0 @@ -import mock -import pytest - -from core.schains.monitor.rotation_monitor import RotationMonitor -from core.schains.checks import SChainChecks - -from web.models.schain import SChainRecord - -from tests.utils import get_test_rule_controller - - -DEFAULT_ROTATION_DATA = { - 'rotation_id': 1, - 'freeze_until': 12345678, - 'new_node': 2999, - 'leaving_node': 1999 -} - - -@pytest.fixture -def new_checks(schain_db, _schain_name, node_config, ima_data, dutils): - schain_record = SChainRecord.get_by_name(schain_db) - return SChainChecks( - schain_db, - node_config.id, - schain_record=schain_record, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - - -def get_rotation_monitor( - skale, - name, - ima_data, - node_config, - schain_db, - dutils, - new_checks, - rotation_data, - rule_controller -): - return RotationMonitor( - skale=skale, - ima_data=ima_data, - schain={'name': name, 'partOfNode': 0, 'generation': 0}, - node_config=node_config, - rotation_data=rotation_data, - checks=new_checks, - rule_controller=get_test_rule_controller(name), - dutils=dutils - ) - - -def test_is_new_node_multiple_new_nodes( - node_config, - skale, - _schain_name, - ima_data, - schain_db, - dutils, - new_checks -): - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=DEFAULT_ROTATION_DATA, - new_checks=new_checks, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - with mock.patch('core.schains.monitor.rotation_monitor.get_previous_schain_groups'): - with mock.patch( - 'core.schains.monitor.rotation_monitor.get_new_nodes_list', - return_value=[node_config.id] - ): - assert test_monitor.get_rotation_mode_func() == test_monitor.new_node - with mock.patch( - 'core.schains.monitor.rotation_monitor.get_new_nodes_list', return_value=[]): - assert test_monitor.get_rotation_mode_func() != test_monitor.new_node - - -def test_is_new_node( - node_config, - schain_config, - _schain_name, - skale, - ima_data, - schain_db, - dutils, - new_checks -): - rotation_data_new_node = { - 'rotation_id': 1, - 'freeze_until': 12345678, - 'new_node': node_config.id, - 'leaving_node': 1999 - } - with mock.patch('core.schains.monitor.rotation_monitor.get_previous_schain_groups'), \ - mock.patch('core.schains.monitor.rotation_monitor.get_new_nodes_list'): - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=rotation_data_new_node, - new_checks=new_checks, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - assert test_monitor.get_rotation_mode_func() == test_monitor.new_node - - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=DEFAULT_ROTATION_DATA, - new_checks=new_checks, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - assert test_monitor.get_rotation_mode_func() != test_monitor.new_node - - -def test_is_leaving_node( - node_config, - schain_config, - skale, - _schain_name, - ima_data, - schain_db, - dutils, - new_checks -): - rotation_data_leaving_node = { - 'rotation_id': 1, - 'freeze_until': 12345678, - 'new_node': 9999, - 'leaving_node': node_config.id, - } - with mock.patch('core.schains.monitor.rotation_monitor.get_previous_schain_groups'), \ - mock.patch('core.schains.monitor.rotation_monitor.get_new_nodes_list'): - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=rotation_data_leaving_node, - new_checks=new_checks, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - assert test_monitor.get_rotation_mode_func() == test_monitor.leaving_node - - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=DEFAULT_ROTATION_DATA, - new_checks=new_checks, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - assert test_monitor.get_rotation_mode_func() != test_monitor.leaving_node - - -def test_is_staying_node( - node_config, - skale, - _schain_name, - schain_config, - ima_data, - schain_db, - rule_controller, - dutils, - new_checks -): - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=DEFAULT_ROTATION_DATA, - new_checks=new_checks, - rule_controller=rule_controller, - dutils=dutils - ) - with mock.patch('core.schains.monitor.rotation_monitor.get_previous_schain_groups'), \ - mock.patch('core.schains.monitor.rotation_monitor.get_new_nodes_list'): - assert test_monitor.get_rotation_mode_func() == test_monitor.staying_node - - -@pytest.mark.skip(reason="test should be improved") -def test_rotation_request( - node_config, - skale, - _schain_name, - schain_config, - ima_data, - schain_db, - rule_controller, - dutils, - new_checks -): - rotation_data_leaving_node = { - 'rotation_id': 1, - 'freeze_until': 12345678, - 'new_node': 9999, - 'leaving_node': node_config.id, - } - test_monitor = get_rotation_monitor( - skale=skale, - name=_schain_name, - ima_data=ima_data, - schain_db=schain_db, - node_config=node_config, - rotation_data=rotation_data_leaving_node, - new_checks=new_checks, - rule_controller=rule_controller, - dutils=dutils - ) - test_monitor.rotation_request() From 7b2eb0efb5b66805eb6bd826b7c9781902b867f8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 13:48:51 +0000 Subject: [PATCH 64/84] Enable terminate_stuck_schain_processes --- core/schains/process_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index b7790413a..2b95d4250 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -65,7 +65,7 @@ def run_process_manager(skale, skale_ima, node_config): schain_record = upsert_schain_record(schain['name']) log_prefix = f'sChain {schain["name"]} -' # todo - move to logger formatter - # terminate_stuck_schain_process(skale, schain_record, schain) + terminate_stuck_schain_process(skale, schain_record, schain) monitor_process_alive = is_monitor_process_alive(schain_record.monitor_id) if not monitor_process_alive: From fd762eb59286f7ae2778a39910df214c404e17a2 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 13:49:31 +0000 Subject: [PATCH 65/84] Add missing monitor tests --- tests/schains/monitor/config_monitor_test.py | 78 +++++ tests/schains/monitor/skaled_monitor_test.py | 336 +++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 tests/schains/monitor/config_monitor_test.py create mode 100644 tests/schains/monitor/skaled_monitor_test.py diff --git a/tests/schains/monitor/config_monitor_test.py b/tests/schains/monitor/config_monitor_test.py new file mode 100644 index 000000000..26b63a301 --- /dev/null +++ b/tests/schains/monitor/config_monitor_test.py @@ -0,0 +1,78 @@ +import os + +import pytest + +from core.schains.checks import ConfigChecks +from core.schains.config.directory import new_schain_config_filepath + +from core.schains.monitor.action import ConfigActionManager +from core.schains.monitor.config_monitor import RegularConfigMonitor + +from web.models.schain import SChainRecord + +from tests.utils import CONFIG_STREAM + + +@pytest.fixture +def rotation_data(schain_db, skale): + return skale.node_rotation.get_rotation(schain_db) + + +@pytest.fixture +def config_checks( + schain_db, + skale, + node_config, + schain_on_contracts, + rotation_data +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return ConfigChecks( + schain_name=name, + node_id=node_config.id, + schain_record=schain_record, + rotation_id=rotation_data['rotation_id'], + stream_version=CONFIG_STREAM + ) + + +@pytest.fixture +def config_am( + schain_db, + skale, + node_config, + schain_on_contracts, + predeployed_ima, + secret_key, + config_checks +): + name = schain_db + rotation_data = skale.node_rotation.get_rotation(name) + schain = skale.schains.get_by_name(name) + + am = ConfigActionManager( + skale=skale, + schain=schain, + node_config=node_config, + rotation_data=rotation_data, + stream_version=CONFIG_STREAM, + checks=config_checks + ) + am.dkg = lambda s: True + return am + + +@pytest.fixture +def regular_config_monitor(config_am, config_checks): + return RegularConfigMonitor( + action_manager=config_am, + checks=config_checks + ) + + +def test_regular_config_monitor(schain_db, regular_config_monitor, rotation_data): + name = schain_db + rotation_id = rotation_data['rotation_id'] + regular_config_monitor.run() + assert os.path.isfile(new_schain_config_filepath(name, rotation_id, CONFIG_STREAM)) diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py new file mode 100644 index 000000000..535e5c7d6 --- /dev/null +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -0,0 +1,336 @@ +import datetime + +import pytest + +from core.schains.checks import CheckRes, SkaledChecks +from core.schains.monitor.action import SkaledActionManager +from core.schains.monitor.skaled_monitor import ( + AfterExitTimeSkaledMonitor, + BackupSkaledMonitor, + get_skaled_monitor, + NewConfigSkaledMonitor, + NoConfigMonitor, + RecreateSkaledMonitor, + RegularSkaledMonitor, + RepairSkaledMonitor +) +from core.schains.rotation import get_schain_public_key +from core.schains.runner import get_container_info +from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER +from web.models.schain import SChainRecord + +CURRENT_TIMESTAMP = 1594903080 +CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) + + +def run_ima_container_mock(schain: dict, mainnet_chain_id: int, dutils=None): + image_name, container_name, _, _ = get_container_info( + IMA_CONTAINER, schain['name']) + dutils.safe_rm(container_name) + dutils.run_container( + image_name=image_name, + name=container_name, + entrypoint='bash -c "while true; do foo; sleep 2; done"' + ) + + +def monitor_schain_container_mock( + schain, + schain_record, + skaled_status, + public_key=None, + start_ts=None, + dutils=None +): + image_name, container_name, _, _ = get_container_info( + SCHAIN_CONTAINER, schain['name']) + dutils.safe_rm(container_name) + dutils.run_container( + image_name=image_name, + name=container_name, + entrypoint='bash -c "while true; do foo; sleep 2; done"' + ) + + +@pytest.fixture +def rotation_data(schain_db, skale): + return skale.node_rotation.get_rotation(schain_db) + + +@pytest.fixture +def skaled_checks( + schain_db, + skale, + rule_controller, + dutils +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return SkaledChecks( + schain_name=name, + schain_record=schain_record, + rule_controller=rule_controller, + ima_linked=True, + dutils=dutils + ) + + +@pytest.fixture +def skaled_am( + schain_db, + skale, + node_config, + rule_controller, + schain_on_contracts, + predeployed_ima, + rotation_data, + secret_key, + ima_data, + ssl_folder, + dutils, + skaled_checks +): + name = schain_db + schain = skale.schains.get_by_name(name) + public_key = get_schain_public_key(skale, name) + return SkaledActionManager( + schain=schain, + rule_controller=rule_controller, + ima_data=ima_data, + node_config=node_config, + public_key=public_key, + checks=skaled_checks, + dutils=dutils + ) + + +class SkaledChecksNoConfig(SkaledChecks): + @property + def config(self) -> CheckRes: + return CheckRes(False) + + +@pytest.fixture +def skaled_checks_no_config( + schain_db, + skale, + rule_controller, + dutils +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return SkaledChecksNoConfig( + schain_name=name, + schain_record=schain_record, + rule_controller=rule_controller, + ima_linked=True, + dutils=dutils + ) + + +def test_get_skaled_monitor_no_config(skaled_am, skaled_checks_no_config, skaled_status, schain_db): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + mon = get_skaled_monitor( + skaled_am, + skaled_checks_no_config, + schain_record, + skaled_status + ) + assert isinstance(mon, NoConfigMonitor) + + +def test_get_skaled_monitor_regular_and_backup(skaled_am, skaled_checks, skaled_status, schain_db): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status + ) + assert isinstance(mon, RegularSkaledMonitor) + + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status, + backup_run=True + ) + assert isinstance(mon, RegularSkaledMonitor) + + schain_record.set_new_schain(False) + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status, + backup_run=True + ) + assert isinstance(mon, BackupSkaledMonitor) + + schain_record.set_new_schain(False) + schain_record.set_first_run(False) + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status, + backup_run=True + ) + assert isinstance(mon, RegularSkaledMonitor) + + +def test_get_skaled_monitor_repair(skaled_am, skaled_checks, skaled_status, schain_db): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + schain_record.set_repair_mode(True) + + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status + ) + assert isinstance(mon, RepairSkaledMonitor) + + +def test_get_skaled_monitor_repair_skaled_status( + skaled_am, + skaled_checks, + schain_db, + skaled_status_repair +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status_repair + ) + assert isinstance(mon, RepairSkaledMonitor) + + +class SkaledChecksWithConfig(SkaledChecks): + @property + def config_updated(self) -> CheckRes: + return CheckRes(False) + + @property + def config(self) -> CheckRes: + return CheckRes(True) + + @property + def container(self) -> CheckRes: + return CheckRes(True) + + +@pytest.fixture +def skaled_checks_new_config( + schain_db, + skale, + rule_controller, + dutils +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return SkaledChecksWithConfig( + schain_name=name, + schain_record=schain_record, + rule_controller=rule_controller, + ima_linked=True, + dutils=dutils + ) + + +def test_get_skaled_monitor_new_config( + skaled_am, + skaled_checks_new_config, + schain_db, + skaled_status +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + + mon = get_skaled_monitor( + skaled_am, + skaled_checks_new_config, + schain_record, + skaled_status + ) + assert isinstance(mon, NewConfigSkaledMonitor) + + +def test_get_skaled_monitor_after_exit( + skaled_am, + skaled_checks, + schain_db, + skaled_status_exit_time_reached +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status_exit_time_reached + ) + assert isinstance(mon, AfterExitTimeSkaledMonitor) + + +def test_get_skaled_monitor_recreate( + skaled_am, + skaled_checks, + schain_db, + skaled_status +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + + schain_record.set_needs_reload(True) + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status + ) + assert isinstance(mon, RecreateSkaledMonitor) + + +def test_regular_skaled_monitor(skaled_am, skaled_checks): + mon = RegularSkaledMonitor(skaled_am, skaled_checks) + mon.run() + + +def test_backup_skaled_monitor(skaled_am, skaled_checks): + mon = BackupSkaledMonitor(skaled_am, skaled_checks) + mon.run() + + +def test_repair_skaled_monitor(skaled_am, skaled_checks): + mon = RepairSkaledMonitor(skaled_am, skaled_checks) + mon.run() + + +def test_new_config_skaled_monitor(skaled_am, skaled_checks): + mon = NewConfigSkaledMonitor(skaled_am, skaled_checks) + mon.run() + + +def test_recreate_skaled_monitor(skaled_am, skaled_checks): + mon = RecreateSkaledMonitor(skaled_am, skaled_checks) + mon.run() + + +def test_after_exit_skaled_monitor(skaled_am, skaled_checks): + mon = AfterExitTimeSkaledMonitor(skaled_am, skaled_checks) + mon.run() + + +def test_no_config_monitor(skaled_am, skaled_checks): + mon = NoConfigMonitor(skaled_am, skaled_checks) + mon.run() From dd2af013bcf762eb7641c464b25abd3c83364a86 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 14:49:26 +0000 Subject: [PATCH 66/84] Fix health routes --- tests/routes/health_test.py | 11 +++-------- web/routes/health.py | 6 ++++-- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/routes/health_test.py b/tests/routes/health_test.py index 0d42ea7d2..08cfb2c4e 100644 --- a/tests/routes/health_test.py +++ b/tests/routes/health_test.py @@ -6,7 +6,7 @@ from sgx import SgxClient from core.node_config import NodeConfig -from core.schains.checks import SChainChecks, CheckRes +from core.schains.checks import SChainChecks from tools.configs import SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER @@ -94,10 +94,6 @@ class SChainChecksMock(SChainChecks): def __init__(self, *args, **kwargs): super(SChainChecksMock, self).__init__(*args, dutils=dutils, **kwargs) - @property - def firewall_rules(self) -> CheckRes: - return CheckRes(True) - def get_schains_for_node_mock(self, node_id): return [ {'name': schain_name}, @@ -105,8 +101,7 @@ def get_schains_for_node_mock(self, node_id): {'name': ''} ] - with mock.patch('web.routes.health.SChainChecks', SChainChecksMock), \ - mock.patch('web.routes.health.SChainChecks', SChainChecksMock): + with mock.patch('web.routes.health.SChainChecks', SChainChecksMock): with mock.patch( 'skale.contracts.manager.schains.SChains.get_schains_for_node', get_schains_for_node_mock @@ -121,7 +116,7 @@ def get_schains_for_node_mock(self, node_id): 'dkg': False, 'config': False, 'volume': False, - 'firewall_rules': True, + 'firewall_rules': False, 'skaled_container': False, 'exit_code_ok': True, 'rpc': False, diff --git a/web/routes/health.py b/web/routes/health.py index 54ae1f894..227431ede 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -28,7 +28,7 @@ from urllib.parse import urlparse -from core.node import get_check_report +from core.node import get_check_report, get_skale_node_version from core.schains.checks import SChainChecks from core.schains.firewall.utils import ( get_default_rule_controller, @@ -84,6 +84,7 @@ def schains_checks(): schains = g.skale.schains.get_schains_for_node(node_id) sync_agent_ranges = get_sync_agent_ranges(g.skale) + stream_version = get_skale_node_version() checks = [] for schain in schains: if schain.get('name') != '': @@ -100,7 +101,8 @@ def schains_checks(): node_id, schain_record=schain_record, rule_controller=rc, - rotation_id=rotation_id + rotation_id=rotation_id, + stream_version=stream_version ).get_all(checks_filter=checks_filter) checks.append({ 'name': schain['name'], From 4da1afaeef6fceaca9c7019f658cc5521b152e95 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Jun 2023 15:44:32 +0000 Subject: [PATCH 67/84] Remove old base_monitor_test --- tests/schains/monitor/base_monitor_test.py | 303 --------------------- 1 file changed, 303 deletions(-) delete mode 100644 tests/schains/monitor/base_monitor_test.py diff --git a/tests/schains/monitor/base_monitor_test.py b/tests/schains/monitor/base_monitor_test.py deleted file mode 100644 index 6577a02cf..000000000 --- a/tests/schains/monitor/base_monitor_test.py +++ /dev/null @@ -1,303 +0,0 @@ -import mock -import pytest - -from core.schains.checks import SChainChecks -from core.schains.cleaner import remove_ima_container -from core.schains.config.main import save_schain_config -from core.schains.ima import ImaData -from core.schains.monitor import BaseMonitor -from core.schains.runner import get_container_info -from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER -from web.models.schain import SChainRecord - -from tests.dkg_utils import safe_run_dkg_mock -from tests.utils import get_test_rule_controller - - -class BaseTestMonitor(BaseMonitor): - @BaseMonitor.monitor_runner - def run(self): - return 1234 - - def _run_all_checks(self): - pass - - -class CrashingTestMonitor(BaseMonitor): - @BaseMonitor.monitor_runner - def run(self): - raise Exception('Something went wrong') - - def _run_all_checks(self): - pass - - -def init_schain_config_mock( - skale, - node_id, - schain_name, - generation, - ecdsa_sgx_key_name, - rotation_data, - schain_record -): - save_schain_config({}, schain_name) - - -def monitor_schain_container_mock( - schain, - schain_record, - skaled_status, - public_key=None, - start_ts=None, - dutils=None -): - image_name, container_name, _, _ = get_container_info( - SCHAIN_CONTAINER, schain['name']) - dutils.safe_rm(container_name) - dutils.run_container( - image_name=image_name, - name=container_name, - entrypoint='bash -c "while true; do foo; sleep 2; done"' - ) - - -def run_ima_container_mock(schain: dict, mainnet_chain_id: int, dutils=None): - image_name, container_name, _, _ = get_container_info( - IMA_CONTAINER, schain['name']) - dutils.safe_rm(container_name) - dutils.run_container( - image_name=image_name, - name=container_name, - entrypoint='bash -c "while true; do foo; sleep 2; done"' - ) - - -@pytest.fixture -def test_monitor( - schain_db, - _schain_name, - node_config, - uninited_rule_controller, - skale, - ima_data, - dutils -): - schain_record = SChainRecord.get_by_name(_schain_name) - schain_checks = SChainChecks( - _schain_name, - node_config.id, - schain_record=schain_record, - rule_controller=uninited_rule_controller, - dutils=dutils - ) - return BaseTestMonitor( - skale=skale, - ima_data=ima_data, - schain={'name': schain_db, 'partOfNode': 0, 'generation': 0}, - node_config=node_config, - rotation_data={'rotation_id': 0, 'finish_ts': 0, 'leaving_node': 1}, - checks=schain_checks, - rule_controller=uninited_rule_controller, - dutils=dutils - ) - - -def test_crashing_monitor( - schain_db, - _schain_name, - skale, - node_config, - rule_controller, - ima_data, - schain_struct, - dutils -): - schain_record = SChainRecord.get_by_name(_schain_name) - schain_checks = SChainChecks( - _schain_name, - node_config.id, - schain_record=schain_record, - rule_controller=rule_controller, - dutils=dutils - ) - test_monitor = CrashingTestMonitor( - skale=skale, - ima_data=ima_data, - schain=schain_struct, - node_config=node_config, - rotation_data={'rotation_id': 1, 'leaving_node': 1}, - checks=schain_checks, - rule_controller=rule_controller, - dutils=dutils - ) - with pytest.raises(Exception): - test_monitor.run() - - -def test_base_monitor(test_monitor): - assert test_monitor.run() == 1234 - - -def test_base_monitor_config_dir(test_monitor): - assert not test_monitor.config_dir() - assert test_monitor.config_dir() - - -def test_base_monitor_dkg(test_monitor): - test_monitor.config_dir() - with mock.patch('core.schains.monitor.base_monitor.safe_run_dkg', safe_run_dkg_mock): - assert not test_monitor.dkg() - assert test_monitor.dkg() - - -def test_base_monitor_config(test_monitor): - test_monitor.config_dir() - with mock.patch( - 'core.schains.monitor.base_monitor.init_schain_config', init_schain_config_mock): - assert not test_monitor.config() - assert test_monitor.config() - - -def test_base_monitor_volume(test_monitor): - test_monitor.config_dir() - assert not test_monitor.volume() - assert test_monitor.volume() - test_monitor.cleanup_schain_docker_entity() - - -def test_base_monitor_skaled_container(test_monitor): - test_monitor.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - assert not test_monitor.skaled_container() - assert test_monitor.skaled_container() - test_monitor.cleanup_schain_docker_entity() - - -def test_base_monitor_skaled_container_sync(test_monitor): - test_monitor.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - new=mock.Mock() - ) as monitor_schain_mock: - test_monitor.skaled_container(download_snapshot=True) - - monitor_schain_mock.assert_called_with( - test_monitor.schain, - schain_record=test_monitor.schain_record, - skaled_status=test_monitor.skaled_status, - public_key='0:0:1:0', - start_ts=None, - dutils=test_monitor.dutils - ) - assert monitor_schain_mock.call_count == 1 - - -def test_base_monitor_skaled_container_sync_delay_start(test_monitor): - test_monitor.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - new=mock.Mock() - ) as monitor_schain_mock: - test_monitor.finish_ts = 1245 - test_monitor.skaled_container(download_snapshot=True, delay_start=True) - - monitor_schain_mock.assert_called_with( - test_monitor.schain, - schain_record=test_monitor.schain_record, - skaled_status=test_monitor.skaled_status, - public_key='0:0:1:0', - start_ts=1245, - dutils=test_monitor.dutils - ) - assert monitor_schain_mock.call_count == 1 - - -def test_base_monitor_restart_skaled_container(test_monitor): - test_monitor.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - assert not test_monitor.restart_skaled_container() - assert test_monitor.restart_skaled_container() - test_monitor.cleanup_schain_docker_entity() - - -def test_base_monitor_ima_container(test_monitor, schain_config, predeployed_ima): - test_monitor.config_dir() - test_monitor.ima_data.linked = True - with mock.patch( - 'core.schains.monitor.containers.run_ima_container', - run_ima_container_mock - ): - assert not test_monitor.ima_container() - assert test_monitor.ima_container() - remove_ima_container(test_monitor.name, dutils=test_monitor.dutils) - - -def test_base_monitor_ima_container_not_linked( - schain_db, - _schain_name, - node_config, - skale, - dutils -): - schain_record = SChainRecord.get_by_name(_schain_name) - schain_checks = SChainChecks( - _schain_name, - node_config.id, - schain_record=schain_record, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - ima_data = ImaData(False, '0x1') - test_monitor = BaseTestMonitor( - skale=skale, - ima_data=ima_data, - schain={'name': schain_db, 'partOfNode': 0, 'generation': 0}, - node_config=node_config, - rotation_data={'rotation_id': 0, 'leaving_node': 1}, - checks=schain_checks, - rule_controller=get_test_rule_controller(_schain_name), - dutils=dutils - ) - - test_monitor.config_dir() - assert not test_monitor.ima_container() - assert not test_monitor.ima_container() - remove_ima_container(test_monitor.name, dutils=test_monitor.dutils) - - -def test_base_monitor_cleanup(test_monitor): - test_monitor.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - test_monitor.skaled_container() - - assert test_monitor.checks.volume.status - assert test_monitor.checks.skaled_container.status - test_monitor.cleanup_schain_docker_entity() - assert not test_monitor.checks.volume.status - assert not test_monitor.checks.skaled_container.status - - -def test_schain_finish_ts(skale, schain_on_contracts): - name = schain_on_contracts - max_node_id = skale.nodes.get_nodes_number() - 1 - assert skale.node_rotation.get_schain_finish_ts(max_node_id, name) is None - - -def test_display_skaled_logs(skale, test_monitor, _schain_name): - test_monitor.volume() - with mock.patch( - 'core.schains.monitor.base_monitor.monitor_schain_container', - monitor_schain_container_mock - ): - test_monitor.skaled_container() - test_monitor.display_skaled_logs() From 361638f20b30db325c0092604cf2453e7c8d573c Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 17 Jun 2023 12:32:33 +0000 Subject: [PATCH 68/84] Update skale.py to 6.0dev1 with fixed SkaledPorts --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1eb7921a2..aa2506529 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ simple-crypt==4.1.7 pycryptodome==3.12.0 python-iptables==1.0.0 -skale.py==6.0dev0 +skale.py==6.0dev1 ima-predeployed==2.0.0b0 etherbase-predeployed==1.1.0b3 From 9dc3eb42fc1cf3d2ba496cc9ac93a8adb497b2f8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sat, 17 Jun 2023 20:38:12 +0000 Subject: [PATCH 69/84] Add new node monitor --- core/schains/config/main.py | 25 +++++++++++++------- core/schains/monitor/action.py | 2 -- core/schains/monitor/skaled_monitor.py | 32 ++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index e3ce016ae..f2406b759 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -21,7 +21,7 @@ import os import shutil import logging -from typing import Dict, Optional +from typing import Dict, List, Optional from skale import Skale @@ -136,19 +136,22 @@ def schain_config_version_match(schain_name, schain_record=None): return schain_record.config_version == skale_node_version -def get_upstream_config_filepath(schain_name) -> Optional[str]: - config_dir = schain_config_dir(schain_name) - prefix = new_config_prefix(schain_name) - dir_files = None +def get_files_with_prefix(config_dir: str, prefix: str) -> List[str]: + prefix_files = [] if os.path.isdir(config_dir): configs = [ os.path.join(config_dir, fname) for fname in os.listdir(config_dir) if fname.startswith(prefix) ] - dir_files = sorted( - configs, - ) + prefix_files = sorted(configs) + return prefix_files + + +def get_upstream_config_filepath(schain_name) -> Optional[str]: + config_dir = schain_config_dir(schain_name) + prefix = new_config_prefix(schain_name) + dir_files = get_files_with_prefix(config_dir, prefix) if not dir_files: return None return os.path.join(config_dir, dir_files[-1]) @@ -178,3 +181,9 @@ def get_finish_ts_from_upstream_config(schain_name: str) -> Optional[int]: def get_finish_ts_from_config(schain_name: str) -> Optional[int]: upstream_path = schain_config_filepath(schain_name) return get_finish_ts(upstream_path) + + +def get_number_of_secret_shares(schain_name: str) -> Optional[int]: + config_dir = schain_config_dir(schain_name) + prefix = 'secret_key_' + return get_files_with_prefix(config_dir, prefix) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index fb79d73dc..499c4cd68 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -262,8 +262,6 @@ def skaled_container( public_key = None if download_snapshot: public_key = self.public_key - if start_ts is None: - start_ts = self.finish_ts monitor_schain_container( self.schain, diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 9bfdf9cfc..095f1202a 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -18,12 +18,14 @@ # along with this program. If not, see . import logging +import time from abc import abstractmethod from typing import Optional from core.schains.monitor.base_monitor import IMonitor from core.schains.checks import SkaledChecks from core.schains.monitor.action import SkaledActionManager +from core.schains.config import get_number_of_secret_shares from core.schains.skaled_status import SkaledStatus from web.models.schain import SChainRecord @@ -59,13 +61,10 @@ class RegularSkaledMonitor(BaseSkaledMonitor): def execute(self) -> None: if not self.checks.firewall_rules: self.am.firewall_rules() - download_snapshot = False if not self.checks.volume: self.am.volume() - if not self.checks.new_schain: - download_snapshot = True if not self.checks.skaled_container: - self.am.skaled_container(download_snapshot=download_snapshot) + self.am.skaled_container() if not self.checks.ima_container: self.am.ima_container() @@ -136,6 +135,23 @@ def execute(self): logger.info('Waiting for upstream config') +class NewNodeMonitor(BaseSkaledMonitor): + def execute(self): + if not self.checks.config_updated: + self.am.update_config() + if not self.checks.volume: + self.am.volume() + if not self.checks.firewall_rules: + self.am.firewall_rules() + if not self.am.skaled_container: + self.am.skaled_container( + download_snapshot=True, + start_ts=self.am.finish_ts + ) + if not self.checks.ima_container: + self.am.ima_container() + + def is_backup_mode(schain_record: SChainRecord, backup_run: bool) -> bool: return schain_record.first_run and not schain_record.new_schain and backup_run @@ -163,6 +179,12 @@ def is_reload_mode(schain_record: SChainRecord) -> bool: return schain_record.needs_reload +def is_new_node_mode(schain_record: SChainRecord, finish_ts: int) -> bool: + ts = int(time.time()) + secret_shares = get_number_of_secret_shares(schain_record.name) + return finish_ts > ts and secret_shares == 1 + + def is_skaled_repair_status(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: if skaled_status is None: return False @@ -193,6 +215,8 @@ def get_skaled_monitor( mon_type = BackupSkaledMonitor elif is_repair_mode(schain_record, checks, skaled_status): mon_type = RepairSkaledMonitor + elif is_new_node_mode(schain_record, action_manager.upstream_finish_ts): + mon_type = NewNodeMonitor elif is_exit_time_reached(checks, skaled_status): mon_type = AfterExitTimeSkaledMonitor elif is_new_config(checks): From 4dee255b9157eec504c5a44a414cabadc8ce03e9 Mon Sep 17 00:00:00 2001 From: badrogger Date: Sun, 18 Jun 2023 14:19:36 +0000 Subject: [PATCH 70/84] Fix is_new_node_monitor --- core/schains/monitor/skaled_monitor.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 095f1202a..6e30a954b 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -25,7 +25,7 @@ from core.schains.monitor.base_monitor import IMonitor from core.schains.checks import SkaledChecks from core.schains.monitor.action import SkaledActionManager -from core.schains.config import get_number_of_secret_shares +from core.schains.config.main import get_number_of_secret_shares from core.schains.skaled_status import SkaledStatus from web.models.schain import SChainRecord @@ -103,6 +103,8 @@ def execute(self) -> None: class RecreateSkaledMonitor(BaseSkaledMonitor): def execute(self) -> None: logger.info('Reload requested. Recreating sChain container') + if self.checks.volume: + self.am.volume() self.am.reloaded_skaled_container() @@ -112,6 +114,8 @@ def execute(self) -> None: self.am.update_config() if self.checks.config and not self.checks.firewall_rules: self.am.firewall_rules() + if self.checks.volume: + self.am.volume() self.am.reloaded_skaled_container() @@ -179,9 +183,11 @@ def is_reload_mode(schain_record: SChainRecord) -> bool: return schain_record.needs_reload -def is_new_node_mode(schain_record: SChainRecord, finish_ts: int) -> bool: +def is_new_node_mode(schain_record: SChainRecord, finish_ts: Optional[int]) -> bool: ts = int(time.time()) secret_shares = get_number_of_secret_shares(schain_record.name) + if finish_ts is None: + return False return finish_ts > ts and secret_shares == 1 From 5b61501d3a45866d964146bdb908248b2d5b08ec Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Jun 2023 09:37:44 +0000 Subject: [PATCH 71/84] Minor logging improvements --- core/schains/checks.py | 3 + core/schains/monitor/base_monitor.py | 314 ------------------ core/schains/monitor/config_monitor.py | 2 +- core/schains/monitor/post_rotation_monitor.py | 38 --- core/schains/monitor/rotation_monitor.py | 112 ------- core/schains/monitor/skaled_monitor.py | 2 +- tools/configs/logs.py | 4 +- tools/docker_utils.py | 8 +- 8 files changed, 11 insertions(+), 472 deletions(-) delete mode 100644 core/schains/monitor/post_rotation_monitor.py delete mode 100644 core/schains/monitor/rotation_monitor.py diff --git a/core/schains/checks.py b/core/schains/checks.py index a0afcea63..fecc59dab 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -83,6 +83,9 @@ def __init__(self, status: bool, data: dict = None): def __bool__(self) -> bool: return self.status + def __str__(self) -> str: + return f'CheckRes<{self.status}>' + class IChecks(ABC): @abstractmethod diff --git a/core/schains/monitor/base_monitor.py b/core/schains/monitor/base_monitor.py index 94952e1d5..7d61205f0 100644 --- a/core/schains/monitor/base_monitor.py +++ b/core/schains/monitor/base_monitor.py @@ -17,50 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import time import logging from abc import ABC, abstractmethod -from datetime import datetime -from functools import wraps - -from skale import Skale - -from core.node_config import NodeConfig -from core.schains.checks import SChainChecks -from core.schains.dkg import safe_run_dkg, save_dkg_results, DkgError -from core.schains.dkg.utils import get_secret_key_share_filepath -from core.schains.cleaner import ( - remove_schain_container, - remove_schain_volume -) -from core.schains.firewall.types import IRuleController - -from core.schains.volume import init_data_volume -from core.schains.rotation import get_schain_public_key - -from core.schains.limits import get_schain_type - -from core.schains.monitor.containers import monitor_schain_container, monitor_ima_container -from core.schains.monitor.rpc import handle_failed_schain_rpc -from core.schains.runner import ( - restart_container, is_container_exists, get_container_name -) -from core.schains.config import init_schain_config, init_schain_config_dir -from core.schains.config.directory import get_schain_config -from core.schains.config.helper import ( - get_base_port_from_config, - get_node_ips_from_config, - get_own_ip_from_config -) -from core.schains.ima import ImaData -from core.schains.skaled_status import init_skaled_status - -from tools.docker_utils import DockerUtils -from tools.notifications.messages import notify_checks, is_checks_passed -from tools.str_formatters import arguments_list_string -from tools.configs.containers import SCHAIN_CONTAINER - -from web.models.schain import upsert_schain_record, set_first_run, SChainRecord logger = logging.getLogger(__name__) @@ -74,275 +32,3 @@ class IMonitor(ABC): @abstractmethod def run(self): pass - - -class BaseMonitor(ABC): - def __init__( - self, - skale: Skale, - ima_data: ImaData, - schain: dict, - node_config: NodeConfig, - rotation_data: dict, - checks: SChainChecks, - rule_controller: IRuleController, - dutils: DockerUtils = None - ): - self.skale = skale - self.ima_data = ima_data - self.schain = schain - self.name = schain['name'] - self.generation = schain['generation'] - self.node_config = node_config - self.checks = checks - self.executed_blocks = {} - - self.rotation_data = rotation_data - self.rotation_id = rotation_data['rotation_id'] - self.rc = rule_controller - - self.finish_ts = skale.node_rotation.get_schain_finish_ts( - node_id=rotation_data['leaving_node'], - schain_name=self.name - ) - logger.info(f'sChain finish_ts calculated: {self.finish_ts}') - - self.skaled_status = init_skaled_status(self.name) - - self.schain_type = get_schain_type(schain['partOfNode']) - - self.dutils = dutils or DockerUtils() - self.p = f'{type(self).__name__} - schain: {self.name} -' - - @property - def schain_record(self): - return upsert_schain_record(self.name) - - def _upd_last_seen(self) -> None: - self.schain_record.set_monitor_last_seen(datetime.now()) - - def _upd_schain_record(self) -> None: - if self.schain_record.first_run: - self.schain_record.set_restart_count(0) - self.schain_record.set_failed_rpc_count(0) - set_first_run(self.name, False) - self.schain_record.set_new_schain(False) - logger.info( - f'sChain {self.name}: ' - f'restart_count - {self.schain_record.restart_count}, ' - f'failed_rpc_count - {self.schain_record.failed_rpc_count}' - ) - - def _run_all_checks(self, save_checks=True) -> None: - checks_dict = self.checks.get_all(save=save_checks) - if not is_checks_passed(checks_dict): - notify_checks(self.name, self.node_config.all(), checks_dict) - - def monitor_block(f): - @wraps(f) - def _monitor_block(self, *args, **kwargs): - ts = time.time() - initial_status = f(self, *args, **kwargs) - te = time.time() - self.executed_blocks[f.__name__] = { - 'ts': ts, - 'te': te, - 'initial_status': initial_status - } - return initial_status - return _monitor_block - - def monitor_runner(f): - @wraps(f) - def _monitor_runner(self): - logger.info(arguments_list_string({ - 'Monitor type': type(self).__name__, - 'Rotation data': self.rotation_data, - 'sChain record': SChainRecord.to_dict(self.schain_record) - }, f'Starting monitor runner - {self.name}')) - - self._upd_last_seen() - if not self.schain_record.first_run: - self._run_all_checks() - self._upd_schain_record() - res = f(self) - self._upd_last_seen() - self.log_executed_blocks() - logger.info(f'{self.p} finished monitor runner') - return res - return _monitor_runner - - @abstractmethod - def run(self): - pass - - @monitor_block - def config_dir(self) -> bool: - initial_status = self.checks.config_dir.status - if not initial_status: - init_schain_config_dir(self.name) - else: - logger.info(f'{self.p} config_dir - ok') - return initial_status - - @monitor_block - def dkg(self) -> bool: - initial_status = self.checks.dkg.status - if not initial_status: - dkg_result = safe_run_dkg( - skale=self.skale, - schain_name=self.name, - node_id=self.node_config.id, - sgx_key_name=self.node_config.sgx_key_name, - rotation_id=self.rotation_id - ) - if dkg_result.status.is_done(): - save_dkg_results( - dkg_result.keys_data, - get_secret_key_share_filepath(self.name, self.rotation_id) - ) - self.schain_record.set_dkg_status(dkg_result.status) - if not dkg_result.status.is_done(): - raise DkgError(f'{self.p} DKG failed') - else: - logger.info(f'{self.p} dkg - ok') - return initial_status - - @monitor_block - def config(self, overwrite=False) -> bool: - initial_status = self.checks.config.status - if not initial_status or overwrite: - init_schain_config( - skale=self.skale, - node_id=self.node_config.id, - schain_name=self.name, - generation=self.generation, - ecdsa_sgx_key_name=self.node_config.sgx_key_name, - rotation_data=self.rotation_data, - schain_record=self.schain_record - ) - else: - logger.info(f'{self.p} config - ok') - return initial_status - - @monitor_block - def volume(self) -> bool: - initial_status = self.checks.volume.status - if not initial_status: - init_data_volume(self.schain, dutils=self.dutils) - else: - logger.info(f'{self.p} volume - ok') - return initial_status - - @monitor_block - def firewall_rules(self, overwrite=False) -> bool: - initial_status = self.checks.firewall_rules.status - if not initial_status: - logger.info('Configuring firewall rules') - conf = get_schain_config(self.name) - base_port = get_base_port_from_config(conf) - node_ips = get_node_ips_from_config(conf) - own_ip = get_own_ip_from_config(conf) - self.rc.configure( - base_port=base_port, - own_ip=own_ip, - node_ips=node_ips - ) - self.rc.sync() - return initial_status - - @monitor_block - def skaled_container(self, download_snapshot: bool = False, delay_start: bool = False) -> bool: - initial_status = self.checks.skaled_container.status - if not initial_status: - public_key, start_ts = None, None - - if download_snapshot: - public_key = get_schain_public_key(self.skale, self.name) - if delay_start: - start_ts = self.finish_ts - - monitor_schain_container( - self.schain, - schain_record=self.schain_record, - skaled_status=self.skaled_status, - public_key=public_key, - start_ts=start_ts, - dutils=self.dutils - ) - time.sleep(CONTAINER_POST_RUN_DELAY) - else: - self.schain_record.set_restart_count(0) - logger.info(f'{self.p} skaled_container - ok') - return initial_status - - @monitor_block - def restart_skaled_container(self) -> bool: - initial_status = True - if not is_container_exists(self.name, dutils=self.dutils): - logger.info(f'sChain {self.name}: container doesn\'t exits, running container...') - initial_status = self.skaled_container() - else: - restart_container(SCHAIN_CONTAINER, self.schain, dutils=self.dutils) - return initial_status - - @monitor_block - def reloaded_skaled_container(self) -> bool: - logger.info('Starting skaled with reloaded configuration') - initial_status = True - if is_container_exists(self.name, dutils=self.dutils): - remove_schain_container(self.name, dutils=self.dutils) - else: - logger.warning(f'sChain {self.name}: container doesn\'t exists') - initial_status = self.skaled_container() - return initial_status - - @monitor_block - def skaled_rpc(self) -> bool: - initial_status = self.checks.rpc.status - if not initial_status: - self.display_skaled_logs() - handle_failed_schain_rpc( - self.schain, - schain_record=self.schain_record, - skaled_status=self.skaled_status, - dutils=self.dutils - ) - else: - self.schain_record.set_failed_rpc_count(0) - logger.info(f'{self.p} rpc - ok') - return initial_status - - @monitor_block - def ima_container(self) -> bool: - initial_status = self.checks.ima_container.status - if not initial_status: - monitor_ima_container( - self.schain, - self.ima_data, - dutils=self.dutils - ) - else: - logger.info(f'{self.p} ima_container - ok') - return initial_status - - @monitor_block - def cleanup_schain_docker_entity(self) -> bool: - remove_schain_container(self.name, dutils=self.dutils) - time.sleep(SCHAIN_CLEANUP_TIMEOUT) - remove_schain_volume(self.name, dutils=self.dutils) - return True - - def log_executed_blocks(self) -> None: - logger.info(arguments_list_string( - self.executed_blocks, f'Finished monitor runner - {self.name}')) - - def display_skaled_logs(self) -> None: - if is_container_exists(self.name, dutils=self.dutils): - container_name = get_container_name(SCHAIN_CONTAINER, self.name) - self.dutils.display_container_logs(container_name) - else: - logger.warning(f'sChain {self.name}: container doesn\'t exists, could not show logs') - - monitor_runner = staticmethod(monitor_runner) - monitor_block = staticmethod(monitor_block) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index eb3b2b3ed..4e6916c9d 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -43,7 +43,7 @@ def execute(self) -> None: def run(self): typename = type(self).__name__ - logger.info('Monitor type %s:', typename) + logger.info('Monitor type %s', typename) self.am._upd_last_seen() self.am._upd_schain_record() self.execute() diff --git a/core/schains/monitor/post_rotation_monitor.py b/core/schains/monitor/post_rotation_monitor.py deleted file mode 100644 index 1cc8cbf97..000000000 --- a/core/schains/monitor/post_rotation_monitor.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2021 SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import logging - -from core.schains.monitor.base_monitor import BaseMonitor - - -logger = logging.getLogger(__name__) - - -class PostRotationMonitor(BaseMonitor): - """ - PostRotationMonitor be executed for the sChain on the staying node when rotation is complete. - This type of monitor reloads skaled container. - """ - @BaseMonitor.monitor_runner - def run(self): - logger.info(f'{self.p} was stopped after rotation. Going to restart') - self.config(overwrite=True) - self.firewall_rules() - self.reloaded_skaled_container() diff --git a/core/schains/monitor/rotation_monitor.py b/core/schains/monitor/rotation_monitor.py deleted file mode 100644 index 3ee5edc39..000000000 --- a/core/schains/monitor/rotation_monitor.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2021 SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import logging - -from core.schains.monitor.base_monitor import BaseMonitor -from core.schains.rotation import set_rotation_for_schain -from skale.schain_config.rotation_history import get_previous_schain_groups, get_new_nodes_list - -logger = logging.getLogger(__name__) - - -class RotationMonitor(BaseMonitor): - """ - RotationMonitor could be executed for the sChain when rotation is in progress for this chain. - In this monitor mode there are 3 possible sub-modes: - - 1. New node - when current node was added to the existing group - 2. Leaving node - when current node was removed from the existing group - 3. Staying node - when current node staying in the group - """ - - def _is_new_rotation_node(self): - return self.rotation_data['new_node'] == self.node_config.id - - def _is_new_node(self) -> bool: - """ - New node monitor runs in 2 cases during rotation: - 1. When the current node is marked as a new node - 2. When the current node doesn't have SKALE chain config file created - """ - if self._is_new_rotation_node(): - logger.info(f'{self.p} current node is the new node in this rotation') - return True - node_groups = get_previous_schain_groups( - skale=self.skale, - schain_name=self.name, - leaving_node_id=self.rotation_data['leaving_node'], - include_keys=False - ) - new_nodes = get_new_nodes_list( - skale=self.skale, - name=self.name, - node_groups=node_groups - ) - logger.info(f'{self.p} new nodes: {new_nodes}, current node: {self.node_config.id}') - if self.node_config.id in new_nodes: - logger.info(f'{self.p} current node is one of the new nodes in this rotation') - return True - return False - - def _is_leaving_node(self) -> bool: - return self.rotation_data['leaving_node'] == self.node_config.id - - def rotation_request(self) -> None: - set_rotation_for_schain(self.name, self.finish_ts) - - def new_node(self) -> None: - self.config_dir() - self.dkg() - self.config() - self.volume() - self.firewall_rules() - self.skaled_container(download_snapshot=True, delay_start=True) - self.ima_container() - - def leaving_node(self) -> None: - self.firewall_rules() - self.skaled_container() - self.skaled_rpc() - self.ima_container() - self.rotation_request() - - def staying_node(self) -> None: - self.firewall_rules() - self.skaled_container() - self.skaled_rpc() - self.ima_container() - self.dkg() - self.rotation_request() - - def get_rotation_mode_func(self): - if self._is_leaving_node(): - return self.leaving_node - if self._is_new_node(): - return self.new_node - return self.staying_node - - @BaseMonitor.monitor_runner - def run(self): - rotation_mode_func = self.get_rotation_mode_func() - logger.info( - f'sChain: {self.name} running {type(self).__name__} ' - f'type: {rotation_mode_func}' - ) - return rotation_mode_func() diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 6e30a954b..c9ccb4b11 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -48,7 +48,7 @@ def execute(self) -> None: def run(self): typename = type(self).__name__ - logger.info('Monitor type %s:', typename) + logger.info('Monitor type %s', typename) self.am._upd_last_seen() self.am._upd_schain_record() self.execute() diff --git a/tools/configs/logs.py b/tools/configs/logs.py index 2a0c89496..8b00b373d 100644 --- a/tools/configs/logs.py +++ b/tools/configs/logs.py @@ -38,10 +38,10 @@ REMOVED_CONTAINERS_FOLDER_NAME ) -LOG_FILE_SIZE_MB = 100 +LOG_FILE_SIZE_MB = 20 LOG_FILE_SIZE_BYTES = LOG_FILE_SIZE_MB * 1000000 -LOG_BACKUP_COUNT = 3 +LOG_BACKUP_COUNT = 5 ADMIN_LOG_FORMAT = '[%(asctime)s %(levelname)s][%(process)d][%(processName)s][%(threadName)s] - %(name)s:%(lineno)d - %(message)s' # noqa API_LOG_FORMAT = '[%(asctime)s] %(process)d %(levelname)s %(url)s %(module)s: %(message)s' # noqa diff --git a/tools/docker_utils.py b/tools/docker_utils.py index cea0d0351..8602fdf53 100644 --- a/tools/docker_utils.py +++ b/tools/docker_utils.py @@ -147,7 +147,7 @@ def get_info(self, container_id: str) -> dict: container_info['stats'] = self.cli.inspect_container(container.id) container_info['status'] = container.status except docker.errors.NotFound: - logger.warning( + logger.debug( f'Can not get info - no such container: {container_id}') container_info['status'] = CONTAINER_NOT_FOUND return container_info @@ -179,7 +179,7 @@ def get_vol(self, name: str) -> Volume: try: return self.client.volumes.get(name) except docker.errors.NotFound: - logger.warning(f'Volume {name} is not exist') + logger.debug(f'Volume {name} is not exist') return None def rm_vol(self, name: str, retry_lvmpy_error: bool = True) -> None: @@ -214,8 +214,8 @@ def safe_get_container(self, container_name: str): try: return self.client.containers.get(container_name) except docker.errors.APIError as e: - logger.warning(e) - logger.warning(f'No such container: {container_name}') + logger.debug(e) + logger.debug(f'No such container: {container_name}') def safe_rm(self, container_name: str, timeout=DOCKER_DEFAULT_STOP_TIMEOUT, **kwargs): """ From 21acb9552afcb763124e00447dabd0cb6b9bccd3 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Jun 2023 10:59:01 +0000 Subject: [PATCH 72/84] Fix no config monitor condition --- core/schains/checks.py | 5 +++++ core/schains/monitor/action.py | 4 ++++ core/schains/monitor/config_monitor.py | 4 ++-- core/schains/monitor/skaled_monitor.py | 13 +++++++++---- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index fecc59dab..4acc05db5 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -204,6 +204,11 @@ def is_healthy(self) -> bool: def new_schain(self) -> CheckRes: return CheckRes(self._new_schain) + @property + def upstream_exists(self) -> CheckRes: + upstream_path = get_upstream_config_filepath(self.name) + return CheckRes(upstream_path is not None) + @property def config_updated(self) -> CheckRes: if not self.config: diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 499c4cd68..fc7a77696 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -354,6 +354,10 @@ def send_exit_request(self) -> None: if finish_ts is not None: set_rotation_for_schain(self.name, finish_ts) + @property + def upstream_config_path(self) -> Optional[str]: + return get_upstream_config_filepath(self.name) + @property def upstream_finish_ts(self) -> Optional[int]: return get_finish_ts_from_upstream_config(self.name) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index 4e6916c9d..1e4ff5a60 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -43,13 +43,13 @@ def execute(self) -> None: def run(self): typename = type(self).__name__ - logger.info('Monitor type %s', typename) + logger.info('Config monitor type %s', typename) self.am._upd_last_seen() self.am._upd_schain_record() self.execute() self.am.log_executed_blocks() self.am._upd_last_seen() - logger.info('Finished %s monitor runner', typename) + logger.info('Finished %s config monitor runner', typename) class RegularConfigMonitor(BaseConfigMonitor): diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index c9ccb4b11..34d2f57ab 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -48,13 +48,13 @@ def execute(self) -> None: def run(self): typename = type(self).__name__ - logger.info('Monitor type %s', typename) + logger.info('Skaled monitor type %s', typename) self.am._upd_last_seen() self.am._upd_schain_record() self.execute() self.am.log_executed_blocks() self.am._upd_last_seen() - logger.info('Finished %s monitor runner', typename) + logger.info('Finished %s skaled monitor runner', typename) class RegularSkaledMonitor(BaseSkaledMonitor): @@ -207,6 +207,10 @@ def is_skaled_reload_status(checks: SkaledChecks, skaled_status: Optional[Skaled return not checks.skaled_container.status and needs_reload +def no_upstream(checks: SkaledChecks) -> bool: + return not checks.upstream_exists + + def get_skaled_monitor( action_manager: SkaledActionManager, checks: SkaledChecks, @@ -215,9 +219,10 @@ def get_skaled_monitor( backup_run: bool = False ) -> BaseSkaledMonitor: mon_type = RegularSkaledMonitor - if not checks.config: + logger.info('Chosing skaled monitor. Upstream config %s', action_manager.upstream_config_path) + if no_upstream(checks): mon_type = NoConfigMonitor - if is_backup_mode(schain_record, backup_run): + elif is_backup_mode(schain_record, backup_run): mon_type = BackupSkaledMonitor elif is_repair_mode(schain_record, checks, skaled_status): mon_type = RepairSkaledMonitor From dd2094df8537cdd633c465e5dd0e2533e84b7826 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Jun 2023 16:45:36 +0000 Subject: [PATCH 73/84] Fix retrieving finish ts. NewNode monitor condition --- core/schains/checks.py | 6 ++- core/schains/config/directory.py | 34 ++++++------- core/schains/config/main.py | 39 +++++++++------ core/schains/monitor/action.py | 1 + core/schains/monitor/main.py | 25 +++++----- core/schains/monitor/skaled_monitor.py | 13 ++--- tests/conftest.py | 68 ++++++++++++++++++++------ tests/schains/checks_test.py | 4 +- tests/schains/config/config_test.py | 44 +++++++++++++++++ 9 files changed, 161 insertions(+), 73 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 4acc05db5..a7358c26e 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -25,7 +25,7 @@ from typing import Any, Dict from core.schains.config.directory import ( - config_exists_for_rotation_id_and_stream_version, + upstreams_for_rotation_id_version, get_schain_check_filepath, get_schain_config, schain_config_dir, @@ -130,11 +130,13 @@ def dkg(self) -> CheckRes: @property def upstream_config(self) -> CheckRes: """Checks that config exists for rotation id and stream""" - return config_exists_for_rotation_id_and_stream_version( + upstreams = upstreams_for_rotation_id_version( self.name, self.rotation_id, self.stream_version ) + logger.debug('Upstream configs for %s: %s', self.name, upstreams) + return len(upstreams) > 0 def new_schain(self) -> CheckRes: return CheckRes(self.schain_record.new_schain) diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index 471e67d89..2ade69828 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -23,6 +23,7 @@ import os import time from pathlib import Path +from typing import List from tools.configs import SCHAIN_CONFIG_DIR_SKALED from tools.configs.schains import ( @@ -38,10 +39,14 @@ def config_filename(name: str) -> str: return f'schain_{name}.json' -def new_config_prefix(name: str) -> str: +def upstream_prefix(name: str) -> str: return f'schain_{name}_' +def upstream_rotation_version_prefix(name: str, rotation_id: int, version: str) -> str: + return f'schain_{name}_{rotation_id}_{version}_' + + def formatted_stream_version(stream_version: str) -> str: return stream_version.replace('.', '_') @@ -49,7 +54,7 @@ def formatted_stream_version(stream_version: str) -> str: def new_config_filename(name: str, rotation_id: int, stream_version: str) -> str: ts = int(time.time()) formatted_version = formatted_stream_version(stream_version) - return f'schain_{name}_{ts}_{rotation_id}_{formatted_version}.json' + return f'schain_{name}_{rotation_id}_{formatted_version}_{ts}.json' def schain_config_dir(name: str) -> str: @@ -85,27 +90,16 @@ def new_schain_config_filepath( return os.path.join(schain_dir_path, new_config_filename(name, rotation_id, stream_version)) -def config_exists_for_rotation_id_and_stream_version( +def upstreams_for_rotation_id_version( name: str, rotation_id: int, - stream_version: str, - in_schain_container: bool = False -) -> str: - schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) + stream_version: str +) -> List[str]: + schain_dir_path = schain_config_dir(name) version = formatted_stream_version(stream_version) - pattern = f'{schain_dir_path}/schain_{name}_*_{rotation_id}_{version}.json' - done = glob.glob(pattern) - return len(done) > 0 - - -def upstream_path_for_rotation_id_stream( - name: str, - rotation_id: int, - stream_version: str, - in_schain_container: bool = False -): - schain_dir_path = SCHAIN_CONFIG_DIR_SKALED if in_schain_container else schain_config_dir(name) - return os.path.join(schain_dir_path) + prefix = upstream_rotation_version_prefix(name, rotation_id, version) + pattern = os.path.join(schain_dir_path, prefix + '*.json') + return glob.glob(pattern) def skaled_status_filepath(name: str) -> str: diff --git a/core/schains/config/main.py b/core/schains/config/main.py index f2406b759..8f1eeb881 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -28,10 +28,10 @@ from core.node import get_skale_node_version from core.schains.config.directory import ( get_tmp_schain_config_filepath, - new_config_prefix, schain_config_dir, schain_config_filepath, - new_schain_config_filepath + new_schain_config_filepath, + upstream_prefix ) from core.schains.config.generator import generate_schain_config_with_skale from tools.str_formatters import arguments_list_string @@ -150,37 +150,44 @@ def get_files_with_prefix(config_dir: str, prefix: str) -> List[str]: def get_upstream_config_filepath(schain_name) -> Optional[str]: config_dir = schain_config_dir(schain_name) - prefix = new_config_prefix(schain_name) + prefix = upstream_prefix(schain_name) dir_files = get_files_with_prefix(config_dir, prefix) if not dir_files: return None return os.path.join(config_dir, dir_files[-1]) -def get_node_groups_from_config(config_path: str) -> Dict: - with open(config_path) as upstream_file: - config = json.load(upstream_file) - return config['skaleConfig']['sChain']['nodeGroups'] +def get_node_groups_from_config(config: Dict) -> Dict: + return config['skaleConfig']['sChain']['nodeGroups'] -def get_finish_ts(config_path: str) -> Optional[int]: - if not os.path.isfile(config_path): +def get_finish_ts(config: str) -> Optional[int]: + node_groups = get_node_groups_from_config(config) + rotation_ids = list(sorted(map(int, node_groups.keys()))) + if len(rotation_ids) < 2: return None - node_groups = get_node_groups_from_config(config_path) - last_rotation = sorted(node_groups.keys())[-1] - return node_groups[last_rotation]['finish_ts'] + prev_rotation = len(rotation_ids) - 2 + return node_groups[str(prev_rotation)]['finish_ts'] def get_finish_ts_from_upstream_config(schain_name: str) -> Optional[int]: upstream_path = get_upstream_config_filepath(schain_name) - if upstream_path is None: + logger.info('Retrieving finish_ts from %s', upstream_path) + if not os.path.isfile(upstream_path): return None - return get_finish_ts(upstream_path) + with open(upstream_path) as upstream_file: + config = json.load(upstream_file) + return get_finish_ts(config) def get_finish_ts_from_config(schain_name: str) -> Optional[int]: - upstream_path = schain_config_filepath(schain_name) - return get_finish_ts(upstream_path) + config_path = schain_config_filepath(schain_name) + logger.info('Retrieving finish_ts from %s', config_path) + if not os.path.isfile(config_path): + return None + with open(config_path) as config_file: + config = json.load(config_file) + return get_finish_ts(config) def get_number_of_secret_shares(schain_name: str) -> Optional[int]: diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index fc7a77696..99886b85b 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -351,6 +351,7 @@ def update_config(self) -> bool: @BaseActionManager.monitor_block def send_exit_request(self) -> None: finish_ts = self.upstream_finish_ts + logger.info('Skaled exit finish_ts %s', finish_ts) if finish_ts is not None: set_rotation_for_schain(self.name, finish_ts) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index ee90e42b8..4e5f7f815 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -171,21 +171,12 @@ def post_monitor_sleep(): is_rotation_active = skale.node_rotation.is_rotation_active(name) - if not is_node_part_of_chain(skale, name, node_config.id) and not is_rotation_active: + leaving_chain = not is_node_part_of_chain(skale, name, node_config.id) + if leaving_chain and not is_rotation_active: logger.warning(f'{p} NOT ON NODE ({node_config.id}), finising process...') return True tasks = [ - Task( - f'{name}-config', - functools.partial( - run_config_pipeline, - skale=skale, - schain=schain, - node_config=node_config, - stream_version=stream_version - ) - ), Task( f'{name}-skaled', functools.partial( @@ -198,6 +189,18 @@ def post_monitor_sleep(): ), ) ] + if not leaving_chain: + tasks.append( + Task( + f'{name}-config', + functools.partial( + run_config_pipeline, + skale=skale, + schain=schain, + node_config=node_config, + stream_version=stream_version + ) + )) run_tasks(name=name, tasks=tasks) if once: return True diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 34d2f57ab..0be692a8b 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -135,14 +135,15 @@ def execute(self): class NoConfigMonitor(BaseSkaledMonitor): def execute(self): - if not self.am.update_config(): + if not self.checks.upstream_exists: logger.info('Waiting for upstream config') + else: + logger.info('Creating skaled config') + self.am.update_config() class NewNodeMonitor(BaseSkaledMonitor): def execute(self): - if not self.checks.config_updated: - self.am.update_config() if not self.checks.volume: self.am.volume() if not self.checks.firewall_rules: @@ -207,8 +208,8 @@ def is_skaled_reload_status(checks: SkaledChecks, skaled_status: Optional[Skaled return not checks.skaled_container.status and needs_reload -def no_upstream(checks: SkaledChecks) -> bool: - return not checks.upstream_exists +def no_config(checks: SkaledChecks) -> bool: + return not checks.config def get_skaled_monitor( @@ -220,7 +221,7 @@ def get_skaled_monitor( ) -> BaseSkaledMonitor: mon_type = RegularSkaledMonitor logger.info('Chosing skaled monitor. Upstream config %s', action_manager.upstream_config_path) - if no_upstream(checks): + if no_config(checks): mon_type = NoConfigMonitor elif is_backup_mode(schain_record, backup_run): mon_type = BackupSkaledMonitor diff --git a/tests/conftest.py b/tests/conftest.py index c5234b1a8..af6c32f85 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -264,37 +264,73 @@ def generate_schain_config(schain_name): "schainName": schain_name, "schainOwner": "0x3483A10F7d6fDeE0b0C1E9ad39cbCE13BD094b12", + "nodeGroups": { - "0": { + "1": { "rotation": None, "nodes": { - "0": [ + "2": [ 0, - 40, - "0xc67d1931b00f2b203907fed1ef81cf29aab65d707eb65fbfed9f6d8e74c1d7129bb0e94403e8c315b1048a4077c473cebc59e74612616af4d7804e19731eab04" # noqa + 2, + "0xc21d242070e84fe5f8e80f14b8867856b714cf7d1984eaa9eb3f83c2a0a0e291b9b05754d071fbe89a91d4811b9b182d350f706dea6e91205905b86b4764ef9a" # noqa ], - "1": [ + "5": [ 1, - 38, - "0x4523552de788999746ab13a0972021f5bf76ac38ca22f5310a5f921b7d28d89e576f5d71f8bcf047b371a999c5ce265012cd0c290931f9bc9d29146069ce79f1" # noqa + 5, + "0xc37b6db727683379d305a4e38532ddeb58c014ebb151662635839edf3f20042bcdaa8e4b1938e8304512c730671aedf310da76315e329be0814709279a45222a" # noqa ], - "2": [ + "4": [ 2, - 39, - "0x12ec7d4531d7953c388ea3544a5e2273e3d9ec6924489ac5aa91c2e4990c586ce0d63f6c99ec7b4e7f404c7f6eb2c968fbda1eb6583e6af3c4eb8f64cfb031c9" # noqa + 4, + "0x8b335f65ecf0845d93bc65a340cc2f4b8c49896f5023ecdff7db6f04bc39f9044239f541702ca7ad98c97aa6a7807aa7c41e394262cca0a32847e3c7c187baf5" # noqa ], "3": [ 3, - 37, - "0xcfbda7c9bbbfa26002c569ee92a07a306205da60af428666cd06ebefc6785df842284abd55a16b2635f895a6e5c5f5f523ab0a44b76e6bf93cf34d4e996cbd0b" # noqa + 3, + "0xf3496966c7fd4a82967d32809267abec49bf5c4cc6d88737cee9b1a436366324d4847127a1220575f4ea6a7661723cd5861c9f8de221405b260511b998a0bbc8" # noqa ] }, "finish_ts": None, "bls_public_key": { - "blsPublicKey0": "21092886060389550499034480408505112402900737789452520523953046451048727082686", # noqa - "blsPublicKey1": "4152187587365395389364717716976849075850656705989482065258061487623185446470", # noqa - "blsPublicKey2": "16705078395405524997550329250978551573025551514774956523868577739340207584290", # noqa - "blsPublicKey3": "10123946908466647712215451689564014152451116972533816450611813231481921711132" # noqa + "blsPublicKey0": "8609115311055863404517113391175862520685049234001839865086978176708009850942", # noqa + "blsPublicKey1": "12596903066793884087763787291339131389612748572700005223043813683790087081", # noqa + "blsPublicKey2": "20949401227653007081557504259342598891084201308661070577835940778932311075846", # noqa + "blsPublicKey3": "5476329286206272760147989277520100256618500160343291262709092037265666120930" # noqa + } + }, + "0": { + "rotation": { + "leaving_node_id": 1, + "new_node_id": 5 + }, + "nodes": { + "2": [ + 0, + 2, + "0xc21d242070e84fe5f8e80f14b8867856b714cf7d1984eaa9eb3f83c2a0a0e291b9b05754d071fbe89a91d4811b9b182d350f706dea6e91205905b86b4764ef9a" # noqa + ], + "4": [ + 2, + 4, + "0x8b335f65ecf0845d93bc65a340cc2f4b8c49896f5023ecdff7db6f04bc39f9044239f541702ca7ad98c97aa6a7807aa7c41e394262cca0a32847e3c7c187baf5" # noqa + ], + "3": [ + 3, + 3, + "0xf3496966c7fd4a82967d32809267abec49bf5c4cc6d88737cee9b1a436366324d4847127a1220575f4ea6a7661723cd5861c9f8de221405b260511b998a0bbc8" # noqa + ], + "1": [ + 1, + 1, + "0x1a857aa4a982ba242c2386febf1eb72dcd1f9669b4237a17878eb836086618af6cda473afa2dfb37c0d2786887397d39bec9601234d933d4384fe38a39b399df" # noqa + ] + }, + "finish_ts": 1687180291, + "bls_public_key": { + "blsPublicKey0": "12452613198400495171048259986807077228209876295033433688114313813034253740478", # noqa + "blsPublicKey1": "10490413552821776191285904316985887024952448646239144269897585941191848882433", # noqa + "blsPublicKey2": "892041650350974543318836112385472656918171041007469041098688469382831828315", # noqa + "blsPublicKey3": "14699659615059580586774988732364564692366017113631037780839594032948908579205" # noqa } } }, diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index a83c3b725..89fa3f314 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -125,7 +125,7 @@ def test_upstream_config_check(schain_checks): upstream_path_wrong_version = os.path.join( schain_config_dir(name), - f'schain_{name}_{ts}_{rotation_id}_2.2.2.json' + f'schain_{name}_{rotation_id}_2.2.2_{ts}.json' ) with open(upstream_path_wrong_version, 'w') as upstream_file: json.dump({'config': 'wrong_upstream'}, upstream_file) @@ -134,7 +134,7 @@ def test_upstream_config_check(schain_checks): formatter_version = CONFIG_STREAM.replace('.', '_') upstream_path = os.path.join( schain_config_dir(name), - f'schain_{name}_{ts}_{rotation_id}_{formatter_version}.json' + f'schain_{name}_{rotation_id}_{formatter_version}_{ts}.json' ) with open(upstream_path, 'w') as upstream_file: diff --git a/tests/schains/config/config_test.py b/tests/schains/config/config_test.py index 681a751ef..c01c01d58 100644 --- a/tests/schains/config/config_test.py +++ b/tests/schains/config/config_test.py @@ -1,3 +1,7 @@ +import os +import shutil +from pathlib import Path + import pytest from core.schains.config.helper import ( @@ -6,6 +10,8 @@ get_own_ip_from_config, get_schain_env ) +from core.schains.config.directory import schain_config_dir +from core.schains.config.main import get_finish_ts, get_upstream_config_filepath from core.schains.volume import get_schain_volume_config from tools.configs.containers import SHARED_SPACE_CONTAINER_PATH, SHARED_SPACE_VOLUME_NAME @@ -43,3 +49,41 @@ def test_get_schain_volume_config(): 'test_name': {'bind': '/mnt/mount_path/', 'mode': 'Z'}, SHARED_SPACE_VOLUME_NAME: {'bind': SHARED_SPACE_CONTAINER_PATH, 'mode': 'Z'} } + + +@pytest.fixture +def upstreams(schain_db, schain_config): + name = schain_db + config_folder = schain_config_dir(name) + files = [ + f'schain_{name}_0_2_1_16_1687183338.json', + f'schain_{name}_1_2_1_16_1687183335.json', + f'schain_{name}_1_2_1_17_1687183336.json' + ] + try: + for fname in files: + Path(os.path.join(config_folder, fname)).touch() + yield files + finally: + shutil.rmtree(config_folder) + + +def test_get_schain_upstream_config(schain_db, upstreams): + name = schain_db + config_folder = schain_config_dir(name) + upstream_config = get_upstream_config_filepath(name) + expected = os.path.join(config_folder, f'schain_{name}_1_2_1_17_1687183336.json') + assert upstream_config == expected + + not_existing_chain = 'not-exist' + upstream_config = get_upstream_config_filepath(not_existing_chain) + assert upstream_config is None + + +def test_get_finish_ts(schain_config): + finish_ts = get_finish_ts(schain_config) + assert finish_ts == 1687180291 + + schain_config['skaleConfig']['sChain']['nodeGroups'].pop('0') + finish_ts = get_finish_ts(schain_config) + assert finish_ts is None From 2ba3f2a7145fec571c5caaa5917f6f45ede4af0f Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Jun 2023 19:51:38 +0000 Subject: [PATCH 74/84] Fix no upstream config file handling --- core/schains/config/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 8f1eeb881..5f59bae2b 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -173,7 +173,7 @@ def get_finish_ts(config: str) -> Optional[int]: def get_finish_ts_from_upstream_config(schain_name: str) -> Optional[int]: upstream_path = get_upstream_config_filepath(schain_name) logger.info('Retrieving finish_ts from %s', upstream_path) - if not os.path.isfile(upstream_path): + if upstream_path is None or not os.path.isfile(upstream_path): return None with open(upstream_path) as upstream_file: config = json.load(upstream_file) From 082c06018dabc6c4f79f176146b33321b7f6f685 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 08:39:56 +0000 Subject: [PATCH 75/84] Handle update properly --- core/schains/checks.py | 15 +++++- core/schains/config/directory.py | 2 +- core/schains/monitor/action.py | 11 ++++- core/schains/monitor/skaled_monitor.py | 25 ++++++---- tests/schains/monitor/skaled_monitor_test.py | 48 ++++++++++++++++++-- 5 files changed, 86 insertions(+), 15 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index a7358c26e..c8c47c1c2 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -37,7 +37,10 @@ get_own_ip_from_config, get_local_schain_http_endpoint ) -from core.schains.config.main import get_upstream_config_filepath +from core.schains.config.main import ( + get_upstream_config_filepath, + get_rotation_ids_from_config_file +) from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController from core.schains.process_manager_helper import is_monitor_process_alive @@ -211,6 +214,16 @@ def upstream_exists(self) -> CheckRes: upstream_path = get_upstream_config_filepath(self.name) return CheckRes(upstream_path is not None) + @property + def rotation_id_updated(self) -> int: + if not self.config: + return CheckRes(False) + upstream_path = get_upstream_config_filepath(self.name) + config_path = schain_config_filepath(self.name) + upstream_rotations = get_rotation_ids_from_config_file(upstream_path) + config_rotations = get_rotation_ids_from_config_file(config_path) + return CheckRes(upstream_rotations == config_rotations) + @property def config_updated(self) -> CheckRes: if not self.config: diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index 2ade69828..cdbe80c7d 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -97,7 +97,7 @@ def upstreams_for_rotation_id_version( ) -> List[str]: schain_dir_path = schain_config_dir(name) version = formatted_stream_version(stream_version) - prefix = upstream_rotation_version_prefix(name, rotation_id, version) + prefix = upstreams_for_rotation_id_version(name, rotation_id, version) pattern = os.path.join(schain_dir_path, prefix + '*.json') return glob.glob(pattern) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 99886b85b..8d435e8ed 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -66,7 +66,7 @@ from tools.docker_utils import DockerUtils from tools.str_formatters import arguments_list_string -from tools.configs.containers import SCHAIN_CONTAINER +from tools.configs.containers import IMA_CONTAINER, SCHAIN_CONTAINER from tools.notifications.messages import notify_repair_mode from web.models.schain import ( @@ -287,6 +287,15 @@ def restart_skaled_container(self) -> bool: restart_container(SCHAIN_CONTAINER, self.schain, dutils=self.dutils) return initial_status + @BaseActionManager.monitor_block + def restart_ima_container(self) -> bool: + initial_status = True + if not is_container_exists(self.name, container_type=IMA_CONTAINER, dutils=self.dutils): + initial_status = self.ima_container() + else: + restart_container(IMA_CONTAINER, self.schain, dutils=self.dutils) + return initial_status + @BaseActionManager.monitor_block def reloaded_skaled_container(self) -> bool: logger.info('starting skaled with reloaded configuration') diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 0be692a8b..2342c3426 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -108,7 +108,7 @@ def execute(self) -> None: self.am.reloaded_skaled_container() -class AfterExitTimeSkaledMonitor(BaseSkaledMonitor): +class AfterExitSkaledMonitor(BaseSkaledMonitor): def execute(self) -> None: if not self.checks.config_updated: self.am.update_config() @@ -117,6 +117,8 @@ def execute(self) -> None: if self.checks.volume: self.am.volume() self.am.reloaded_skaled_container() + if not self.checks.ima_container: + self.am.restart_ima_container() class NewConfigSkaledMonitor(BaseSkaledMonitor): @@ -173,11 +175,16 @@ def is_new_config(checks: SkaledChecks) -> bool: return checks.config and not checks.config_updated -def is_exit_time_reached(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: +def is_config_update_time( + checks: SkaledChecks, + skaled_status: Optional[SkaledStatus] +) -> bool: if not skaled_status: return False - skaled_status.log() - return not checks.skaled_container.status and skaled_status.exit_time_reached + if not checks.skaled_container: + if not checks.rotation_id_updated or skaled_status.exit_time_reached: + return True + return skaled_status.exit_time_reached def is_reload_mode(schain_record: SChainRecord) -> bool: @@ -205,7 +212,7 @@ def is_skaled_reload_status(checks: SkaledChecks, skaled_status: Optional[Skaled return False skaled_status.log() needs_reload = skaled_status.start_again and not skaled_status.start_from_snapshot - return not checks.skaled_container.status and needs_reload + return not checks.skaled_container and needs_reload def no_config(checks: SkaledChecks) -> bool: @@ -220,7 +227,9 @@ def get_skaled_monitor( backup_run: bool = False ) -> BaseSkaledMonitor: mon_type = RegularSkaledMonitor - logger.info('Chosing skaled monitor. Upstream config %s', action_manager.upstream_config_path) + logger.info('Chosing skaled monitor') + logger.info('Upstream config %s', action_manager.upstream_config_path) + skaled_status.log() if no_config(checks): mon_type = NoConfigMonitor elif is_backup_mode(schain_record, backup_run): @@ -229,8 +238,8 @@ def get_skaled_monitor( mon_type = RepairSkaledMonitor elif is_new_node_mode(schain_record, action_manager.upstream_finish_ts): mon_type = NewNodeMonitor - elif is_exit_time_reached(checks, skaled_status): - mon_type = AfterExitTimeSkaledMonitor + elif is_config_update_time(checks, skaled_status): + mon_type = AfterExitSkaledMonitor elif is_new_config(checks): mon_type = NewConfigSkaledMonitor elif is_reload_mode(schain_record): diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index 535e5c7d6..33eb7e7b9 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -1,14 +1,19 @@ import datetime +import os +import shutil +from pathlib import Path import pytest from core.schains.checks import CheckRes, SkaledChecks +from core.schains.config.directory import schain_config_dir from core.schains.monitor.action import SkaledActionManager from core.schains.monitor.skaled_monitor import ( - AfterExitTimeSkaledMonitor, + AfterExitSkaledMonitor, BackupSkaledMonitor, get_skaled_monitor, NewConfigSkaledMonitor, + NewNodeMonitor, NoConfigMonitor, RecreateSkaledMonitor, RegularSkaledMonitor, @@ -268,7 +273,7 @@ def test_get_skaled_monitor_after_exit( skaled_am, skaled_checks, schain_db, - skaled_status_exit_time_reached + skaled_status_exit_time_reached, ): name = schain_db schain_record = SChainRecord.get_by_name(name) @@ -279,7 +284,37 @@ def test_get_skaled_monitor_after_exit( schain_record, skaled_status_exit_time_reached ) - assert isinstance(mon, AfterExitTimeSkaledMonitor) + assert isinstance(mon, AfterExitSkaledMonitor) + + +@pytest.fixture +def new_upstream(schain_db): + name = schain_db + config_dir = schain_config_dir(name) + upath = os.path.join(f'schain_{name}_2_2_1_16_1687248983') + try: + Path(upath).touch() + yield upath + finally: + shutil.rmtree(config_dir) + + +def test_get_skaled_monitor_after_exit_no_rotation( + skaled_am, + skaled_checks, + schain_db, + skaled_status, + new_upstream +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status + ) + assert isinstance(mon, AfterExitSkaledMonitor) def test_get_skaled_monitor_recreate( @@ -327,10 +362,15 @@ def test_recreate_skaled_monitor(skaled_am, skaled_checks): def test_after_exit_skaled_monitor(skaled_am, skaled_checks): - mon = AfterExitTimeSkaledMonitor(skaled_am, skaled_checks) + mon = AfterExitSkaledMonitor(skaled_am, skaled_checks) mon.run() def test_no_config_monitor(skaled_am, skaled_checks): mon = NoConfigMonitor(skaled_am, skaled_checks) mon.run() + + +def test_new_node_monitor(skaled_am, skaled_checks): + mon = NewNodeMonitor(skaled_am, skaled_checks) + mon.run() From eeed2e4ce9c249349f1bce87c4c79ee10952cd4c Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 08:55:54 +0000 Subject: [PATCH 76/84] Add missing changes --- core/schains/config/main.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index 5f59bae2b..b14c169d9 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -161,6 +161,21 @@ def get_node_groups_from_config(config: Dict) -> Dict: return config['skaleConfig']['sChain']['nodeGroups'] +def get_rotation_ids_from_config(config: Dict) -> Dict: + node_groups = get_node_groups_from_config(config) + rotation_ids = list(sorted(map(int, node_groups.keys()))) + return rotation_ids + + +def get_rotation_ids_from_config_file(config_path: str) -> List[int]: + logger.info('Retrieving rotation_ids from %s', config_path) + if config_path is None or not os.path.isfile(config_path): + return [] + with open(config_path) as config_file: + config = json.load(config_file) + return get_rotation_ids_from_config(config) + + def get_finish_ts(config: str) -> Optional[int]: node_groups = get_node_groups_from_config(config) rotation_ids = list(sorted(map(int, node_groups.keys()))) From b6463e9390beedb7651b950780b856b493cb6fa2 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 09:35:40 +0000 Subject: [PATCH 77/84] Fix update config monitor condition --- core/schains/config/directory.py | 2 +- core/schains/monitor/skaled_monitor.py | 15 +++++----- tests/conftest.py | 16 +++++++++- tests/schains/checks_test.py | 31 ++++++++++++++++++++ tests/schains/monitor/skaled_monitor_test.py | 30 +++++-------------- 5 files changed, 61 insertions(+), 33 deletions(-) diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index cdbe80c7d..2ade69828 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -97,7 +97,7 @@ def upstreams_for_rotation_id_version( ) -> List[str]: schain_dir_path = schain_config_dir(name) version = formatted_stream_version(stream_version) - prefix = upstreams_for_rotation_id_version(name, rotation_id, version) + prefix = upstream_rotation_version_prefix(name, rotation_id, version) pattern = os.path.join(schain_dir_path, prefix + '*.json') return glob.glob(pattern) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 2342c3426..cba237804 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -108,7 +108,7 @@ def execute(self) -> None: self.am.reloaded_skaled_container() -class AfterExitSkaledMonitor(BaseSkaledMonitor): +class UpdateConfigSkaledMonitor(BaseSkaledMonitor): def execute(self) -> None: if not self.checks.config_updated: self.am.update_config() @@ -131,7 +131,6 @@ def execute(self): self.am.skaled_rpc() if not self.checks.ima_container: self.am.ima_container() - # TODO Prevent exit requests from spamming self.am.send_exit_request() @@ -181,10 +180,10 @@ def is_config_update_time( ) -> bool: if not skaled_status: return False - if not checks.skaled_container: + if not checks.skaled_container and not checks.config_updated: if not checks.rotation_id_updated or skaled_status.exit_time_reached: return True - return skaled_status.exit_time_reached + return False def is_reload_mode(schain_record: SChainRecord) -> bool: @@ -193,10 +192,10 @@ def is_reload_mode(schain_record: SChainRecord) -> bool: def is_new_node_mode(schain_record: SChainRecord, finish_ts: Optional[int]) -> bool: ts = int(time.time()) - secret_shares = get_number_of_secret_shares(schain_record.name) + secret_shares_number = get_number_of_secret_shares(schain_record.name) if finish_ts is None: return False - return finish_ts > ts and secret_shares == 1 + return finish_ts > ts and secret_shares_number == 1 def is_skaled_repair_status(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: @@ -227,7 +226,7 @@ def get_skaled_monitor( backup_run: bool = False ) -> BaseSkaledMonitor: mon_type = RegularSkaledMonitor - logger.info('Chosing skaled monitor') + logger.info('Choosing skaled monitor') logger.info('Upstream config %s', action_manager.upstream_config_path) skaled_status.log() if no_config(checks): @@ -239,7 +238,7 @@ def get_skaled_monitor( elif is_new_node_mode(schain_record, action_manager.upstream_finish_ts): mon_type = NewNodeMonitor elif is_config_update_time(checks, skaled_status): - mon_type = AfterExitSkaledMonitor + mon_type = UpdateConfigSkaledMonitor elif is_new_config(checks): mon_type = NewConfigSkaledMonitor elif is_reload_mode(schain_record): diff --git a/tests/conftest.py b/tests/conftest.py index af6c32f85..f04e979a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,10 +5,12 @@ import shutil import string import subprocess +from pathlib import Path import docker import pytest + from skale import SkaleManager from skale.wallets import Web3Wallet from skale.utils.account_tools import generate_account, send_eth @@ -35,7 +37,7 @@ get_node_ips_from_config, get_own_ip_from_config ) -from core.schains.config.directory import skaled_status_filepath +from core.schains.config.directory import schain_config_dir, skaled_status_filepath from core.schains.cleaner import remove_schain_container, remove_schain_volume from core.schains.ima import ImaData from core.schains.skaled_status import init_skaled_status, SkaledStatus @@ -677,3 +679,15 @@ def skale_manager_opts(): schains_internal_address='0x1656', nodes_address='0x7742' ) + + +@pytest.fixture +def new_upstream(schain_db): + name = schain_db + config_dir = schain_config_dir(name) + upath = os.path.join(f'schain_{name}_2_2_1_16_1687248983') + try: + Path(upath).touch() + yield upath + finally: + shutil.rmtree(config_dir) diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 89fa3f314..17f37d3b0 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -385,3 +385,34 @@ def test_config_updated(skale, rule_controller, schain_db, dutils): with open(upstream_path, 'w') as upstream_file: json.dump(config_content, upstream_file) assert not checks.config_updated + + +def test_upstream_config_1check( + skale, + schain_db, + uninited_rule_controller, + new_upstream, + dutils +): + schain_name = schain_db + schain_record = SChainRecord.get_by_name(schain_name) + checks = SChainChecks( + schain_name, + TEST_NODE_ID, + schain_record=schain_record, + rule_controller=uninited_rule_controller, + stream_version=CONFIG_STREAM, + dutils=dutils + ) + assert not checks.upstream_config + + checks = SChainChecks( + schain_name, + TEST_NODE_ID, + schain_record=schain_record, + rule_controller=uninited_rule_controller, + stream_version='2.1.16', + rotation_id=2, + dutils=dutils + ) + assert checks.upstream_config diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index 33eb7e7b9..f6db35988 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -1,15 +1,10 @@ import datetime -import os -import shutil -from pathlib import Path import pytest from core.schains.checks import CheckRes, SkaledChecks -from core.schains.config.directory import schain_config_dir from core.schains.monitor.action import SkaledActionManager from core.schains.monitor.skaled_monitor import ( - AfterExitSkaledMonitor, BackupSkaledMonitor, get_skaled_monitor, NewConfigSkaledMonitor, @@ -17,7 +12,8 @@ NoConfigMonitor, RecreateSkaledMonitor, RegularSkaledMonitor, - RepairSkaledMonitor + RepairSkaledMonitor, + UpdateConfigSkaledMonitor ) from core.schains.rotation import get_schain_public_key from core.schains.runner import get_container_info @@ -269,7 +265,7 @@ def test_get_skaled_monitor_new_config( assert isinstance(mon, NewConfigSkaledMonitor) -def test_get_skaled_monitor_after_exit( +def test_get_skaled_monitor_update_config( skaled_am, skaled_checks, schain_db, @@ -284,22 +280,10 @@ def test_get_skaled_monitor_after_exit( schain_record, skaled_status_exit_time_reached ) - assert isinstance(mon, AfterExitSkaledMonitor) + assert isinstance(mon, UpdateConfigSkaledMonitor) -@pytest.fixture -def new_upstream(schain_db): - name = schain_db - config_dir = schain_config_dir(name) - upath = os.path.join(f'schain_{name}_2_2_1_16_1687248983') - try: - Path(upath).touch() - yield upath - finally: - shutil.rmtree(config_dir) - - -def test_get_skaled_monitor_after_exit_no_rotation( +def test_get_skaled_monitor_update_config_no_rotation( skaled_am, skaled_checks, schain_db, @@ -314,7 +298,7 @@ def test_get_skaled_monitor_after_exit_no_rotation( schain_record, skaled_status ) - assert isinstance(mon, AfterExitSkaledMonitor) + assert isinstance(mon, UpdateConfigSkaledMonitor) def test_get_skaled_monitor_recreate( @@ -362,7 +346,7 @@ def test_recreate_skaled_monitor(skaled_am, skaled_checks): def test_after_exit_skaled_monitor(skaled_am, skaled_checks): - mon = AfterExitSkaledMonitor(skaled_am, skaled_checks) + mon = UpdateConfigSkaledMonitor(skaled_am, skaled_checks) mon.run() From c6e49b6075a5b6c56d1a7a11291735c205b53d4c Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 13:02:01 +0000 Subject: [PATCH 78/84] Fix new node monitor condition --- core/schains/config/main.py | 4 +- core/schains/monitor/skaled_monitor.py | 21 +++-- tests/schains/checks_test.py | 31 ------ tests/schains/monitor/skaled_monitor_test.py | 99 ++++++++++++++++++-- 4 files changed, 104 insertions(+), 51 deletions(-) diff --git a/core/schains/config/main.py b/core/schains/config/main.py index b14c169d9..9f7c8dec2 100644 --- a/core/schains/config/main.py +++ b/core/schains/config/main.py @@ -205,7 +205,7 @@ def get_finish_ts_from_config(schain_name: str) -> Optional[int]: return get_finish_ts(config) -def get_number_of_secret_shares(schain_name: str) -> Optional[int]: +def get_number_of_secret_shares(schain_name: str) -> int: config_dir = schain_config_dir(schain_name) prefix = 'secret_key_' - return get_files_with_prefix(config_dir, prefix) + return len(get_files_with_prefix(config_dir, prefix)) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index cba237804..52b6c965b 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -134,7 +134,7 @@ def execute(self): self.am.send_exit_request() -class NoConfigMonitor(BaseSkaledMonitor): +class NoConfigSkaledMonitor(BaseSkaledMonitor): def execute(self): if not self.checks.upstream_exists: logger.info('Waiting for upstream config') @@ -143,7 +143,7 @@ def execute(self): self.am.update_config() -class NewNodeMonitor(BaseSkaledMonitor): +class NewNodeSkaledMonitor(BaseSkaledMonitor): def execute(self): if not self.checks.volume: self.am.volume() @@ -170,7 +170,7 @@ def is_repair_mode( return schain_record.repair_mode or is_skaled_repair_status(checks, skaled_status) -def is_new_config(checks: SkaledChecks) -> bool: +def is_new_config_mode(checks: SkaledChecks) -> bool: return checks.config and not checks.config_updated @@ -225,21 +225,24 @@ def get_skaled_monitor( skaled_status: Optional[SkaledStatus], backup_run: bool = False ) -> BaseSkaledMonitor: - mon_type = RegularSkaledMonitor + logger.info('Choosing skaled monitor') logger.info('Upstream config %s', action_manager.upstream_config_path) - skaled_status.log() + if skaled_status: + skaled_status.log() + + mon_type = RegularSkaledMonitor if no_config(checks): - mon_type = NoConfigMonitor + mon_type = NoConfigSkaledMonitor elif is_backup_mode(schain_record, backup_run): mon_type = BackupSkaledMonitor elif is_repair_mode(schain_record, checks, skaled_status): mon_type = RepairSkaledMonitor - elif is_new_node_mode(schain_record, action_manager.upstream_finish_ts): - mon_type = NewNodeMonitor + elif is_new_node_mode(schain_record, action_manager.finish_ts): + mon_type = NewNodeSkaledMonitor elif is_config_update_time(checks, skaled_status): mon_type = UpdateConfigSkaledMonitor - elif is_new_config(checks): + elif is_new_config_mode(checks): mon_type = NewConfigSkaledMonitor elif is_reload_mode(schain_record): mon_type = RecreateSkaledMonitor diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 17f37d3b0..89fa3f314 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -385,34 +385,3 @@ def test_config_updated(skale, rule_controller, schain_db, dutils): with open(upstream_path, 'w') as upstream_file: json.dump(config_content, upstream_file) assert not checks.config_updated - - -def test_upstream_config_1check( - skale, - schain_db, - uninited_rule_controller, - new_upstream, - dutils -): - schain_name = schain_db - schain_record = SChainRecord.get_by_name(schain_name) - checks = SChainChecks( - schain_name, - TEST_NODE_ID, - schain_record=schain_record, - rule_controller=uninited_rule_controller, - stream_version=CONFIG_STREAM, - dutils=dutils - ) - assert not checks.upstream_config - - checks = SChainChecks( - schain_name, - TEST_NODE_ID, - schain_record=schain_record, - rule_controller=uninited_rule_controller, - stream_version='2.1.16', - rotation_id=2, - dutils=dutils - ) - assert checks.upstream_config diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index f6db35988..046119507 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -1,5 +1,7 @@ import datetime +from unittest import mock +import freezegun import pytest from core.schains.checks import CheckRes, SkaledChecks @@ -8,8 +10,8 @@ BackupSkaledMonitor, get_skaled_monitor, NewConfigSkaledMonitor, - NewNodeMonitor, - NoConfigMonitor, + NewNodeSkaledMonitor, + NoConfigSkaledMonitor, RecreateSkaledMonitor, RegularSkaledMonitor, RepairSkaledMonitor, @@ -20,6 +22,7 @@ from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER from web.models.schain import SChainRecord + CURRENT_TIMESTAMP = 1594903080 CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) @@ -129,6 +132,34 @@ def skaled_checks_no_config( ) +class SkaledChecksConfigOutdated(SkaledChecks): + @property + def config_outdated(self) -> CheckRes: + return CheckRes(False) + + @property + def config_updated(self) -> CheckRes: + return CheckRes(False) + + +@pytest.fixture +def skaled_checks_outdated_config( + schain_db, + skale, + rule_controller, + dutils +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + return SkaledChecksConfigOutdated( + schain_name=name, + schain_record=schain_record, + rule_controller=rule_controller, + ima_linked=True, + dutils=dutils + ) + + def test_get_skaled_monitor_no_config(skaled_am, skaled_checks_no_config, skaled_status, schain_db): name = schain_db schain_record = SChainRecord.get_by_name(name) @@ -138,7 +169,7 @@ def test_get_skaled_monitor_no_config(skaled_am, skaled_checks_no_config, skaled schain_record, skaled_status ) - assert isinstance(mon, NoConfigMonitor) + assert isinstance(mon, NoConfigSkaledMonitor) def test_get_skaled_monitor_regular_and_backup(skaled_am, skaled_checks, skaled_status, schain_db): @@ -224,6 +255,10 @@ def config_updated(self) -> CheckRes: def config(self) -> CheckRes: return CheckRes(True) + @property + def skaled_container(self) -> CheckRes: + return CheckRes(True) + @property def container(self) -> CheckRes: return CheckRes(True) @@ -265,9 +300,55 @@ def test_get_skaled_monitor_new_config( assert isinstance(mon, NewConfigSkaledMonitor) +@freezegun.freeze_time(CURRENT_DATETIME) +def test_get_skaled_monitor_new_node( + schain_db, + skale, + node_config, + rule_controller, + schain_on_contracts, + predeployed_ima, + rotation_data, + secret_key, + ima_data, + ssl_folder, + skaled_status, + skaled_checks, + dutils +): + name = schain_db + schain_record = SChainRecord.get_by_name(name) + schain = skale.schains.get_by_name(name) + public_key = get_schain_public_key(skale, name) + + finish_ts = CURRENT_TIMESTAMP + 10 + with mock.patch( + f'{__name__}.SkaledActionManager.finish_ts', + new_callable=mock.PropertyMock + ) as finish_ts_mock: + skaled_am = SkaledActionManager( + schain=schain, + rule_controller=rule_controller, + ima_data=ima_data, + node_config=node_config, + public_key=public_key, + checks=skaled_checks, + dutils=dutils + ) + finish_ts_mock.return_value = finish_ts + + mon = get_skaled_monitor( + skaled_am, + skaled_checks, + schain_record, + skaled_status + ) + assert isinstance(mon, NewNodeSkaledMonitor) + + def test_get_skaled_monitor_update_config( skaled_am, - skaled_checks, + skaled_checks_outdated_config, schain_db, skaled_status_exit_time_reached, ): @@ -276,7 +357,7 @@ def test_get_skaled_monitor_update_config( mon = get_skaled_monitor( skaled_am, - skaled_checks, + skaled_checks_outdated_config, schain_record, skaled_status_exit_time_reached ) @@ -285,7 +366,7 @@ def test_get_skaled_monitor_update_config( def test_get_skaled_monitor_update_config_no_rotation( skaled_am, - skaled_checks, + skaled_checks_outdated_config, schain_db, skaled_status, new_upstream @@ -294,7 +375,7 @@ def test_get_skaled_monitor_update_config_no_rotation( schain_record = SChainRecord.get_by_name(name) mon = get_skaled_monitor( skaled_am, - skaled_checks, + skaled_checks_outdated_config, schain_record, skaled_status ) @@ -351,10 +432,10 @@ def test_after_exit_skaled_monitor(skaled_am, skaled_checks): def test_no_config_monitor(skaled_am, skaled_checks): - mon = NoConfigMonitor(skaled_am, skaled_checks) + mon = NoConfigSkaledMonitor(skaled_am, skaled_checks) mon.run() def test_new_node_monitor(skaled_am, skaled_checks): - mon = NewNodeMonitor(skaled_am, skaled_checks) + mon = NewNodeSkaledMonitor(skaled_am, skaled_checks) mon.run() From dd155d3c71e85f1829b71373da891b40d996a8f7 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 16:15:19 +0000 Subject: [PATCH 79/84] Fix NewNodeSkaledMonitor --- core/schains/monitor/skaled_monitor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 52b6c965b..eb4205555 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -149,7 +149,7 @@ def execute(self): self.am.volume() if not self.checks.firewall_rules: self.am.firewall_rules() - if not self.am.skaled_container: + if not self.checks.skaled_container: self.am.skaled_container( download_snapshot=True, start_ts=self.am.finish_ts @@ -225,7 +225,6 @@ def get_skaled_monitor( skaled_status: Optional[SkaledStatus], backup_run: bool = False ) -> BaseSkaledMonitor: - logger.info('Choosing skaled monitor') logger.info('Upstream config %s', action_manager.upstream_config_path) if skaled_status: From 15c78f20631d598adc0389fb675c01bdf310e43c Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 16:15:38 +0000 Subject: [PATCH 80/84] Fix leaving node condition --- core/schains/monitor/main.py | 142 +++++++++++++++++------------ tests/schains/monitor/main_test.py | 71 +++++++++------ 2 files changed, 126 insertions(+), 87 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 4e5f7f815..cfcfe9c8b 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -22,7 +22,9 @@ import random import logging from typing import Dict +from concurrent.futures import Future, ThreadPoolExecutor from importlib import reload +from typing import List, Optional from skale import Skale, SkaleIma from web3._utils import request as web3_request @@ -37,7 +39,7 @@ RegularConfigMonitor ) from core.schains.monitor.action import ConfigActionManager, SkaledActionManager -from core.schains.task import run_tasks, Task +from core.schains.task import keep_tasks_running, Task from core.schains.firewall.utils import get_sync_agent_ranges from core.schains.rotation import get_schain_public_key from core.schains.skaled_status import get_skaled_status @@ -55,10 +57,6 @@ logger = logging.getLogger(__name__) -def get_log_prefix(name): - return f'schain: {name} -' - - def run_config_pipeline( skale: Skale, schain: Dict, @@ -145,6 +143,64 @@ def run_skaled_pipeline( mon.run() +def post_monitor_sleep(): + schain_monitor_sleep = random.randint( + MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, + MAX_SCHAIN_MONITOR_SLEEP_INTERVAL + ) + logger.info('%s monitor completed, sleeping for {schain_monitor_sleep}s...') + time.sleep(schain_monitor_sleep) + + +def create_and_execute_tasks( + skale, + schain, + node_config: NodeConfig, + skale_ima: SkaleIma, + stream_version, + executor, + futures, + dutils +): + reload(web3_request) + name = schain['name'] + + is_rotation_active = skale.node_rotation.is_rotation_active(name) + + leaving_chain = not is_node_part_of_chain(skale, name, node_config.id) + if leaving_chain and not is_rotation_active: + logger.warning('NOT ON NODE ({node_config.id}), finising process...') + return True + + tasks = [ + Task( + f'{name}-skaled', + functools.partial( + run_skaled_pipeline, + skale=skale, + skale_ima=skale_ima, + schain=schain, + node_config=node_config, + dutils=dutils + ), + ) + ] + if not leaving_chain: + tasks.append( + Task( + f'{name}-config', + functools.partial( + run_config_pipeline, + skale=skale, + schain=schain, + node_config=node_config, + stream_version=stream_version + ) + )) + + keep_tasks_running(executor, tasks, futures) + + def run_monitor_for_schain( skale, skale_ima, @@ -153,60 +209,28 @@ def run_monitor_for_schain( dutils=None, once=False ): - p = get_log_prefix(schain['name']) stream_version = get_skale_node_version() - def post_monitor_sleep(): - schain_monitor_sleep = random.randint( - MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, - MAX_SCHAIN_MONITOR_SLEEP_INTERVAL - ) - logger.info('%s monitor completed, sleeping for {schain_monitor_sleep}s...', p) - time.sleep(schain_monitor_sleep) - - while True: - try: - reload(web3_request) - name = schain['name'] - - is_rotation_active = skale.node_rotation.is_rotation_active(name) - - leaving_chain = not is_node_part_of_chain(skale, name, node_config.id) - if leaving_chain and not is_rotation_active: - logger.warning(f'{p} NOT ON NODE ({node_config.id}), finising process...') - return True - - tasks = [ - Task( - f'{name}-skaled', - functools.partial( - run_skaled_pipeline, - skale=skale, - skale_ima=skale_ima, - schain=schain, - node_config=node_config, - dutils=dutils - ), + tasks_number = 2 + with ThreadPoolExecutor(max_workers=tasks_number, thread_name_prefix='T') as executor: + futures: List[Optional[Future]] = [None for i in range(tasks_number)] + while True: + try: + create_and_execute_tasks( + skale, + schain, + node_config, + skale_ima, + stream_version, + executor, + futures, + dutils ) - ] - if not leaving_chain: - tasks.append( - Task( - f'{name}-config', - functools.partial( - run_config_pipeline, - skale=skale, - schain=schain, - node_config=node_config, - stream_version=stream_version - ) - )) - run_tasks(name=name, tasks=tasks) - if once: - return True - post_monitor_sleep() - except Exception: - logger.exception('%s monitor failed', p) - if once: - return False - post_monitor_sleep() + if once: + return True + post_monitor_sleep() + except Exception: + logger.exception('Monitor failed') + if once: + return False + post_monitor_sleep() diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 416b325ed..77847b910 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -1,4 +1,5 @@ import mock +from concurrent.futures import ThreadPoolExecutor import pytest @@ -10,34 +11,6 @@ from tools.helper import is_node_part_of_chain -class TaskNoAction(Task): - def run(self): - pass - - -@pytest.mark.skip -def test_run_monitor_for_schain(skale, skale_ima, node_config, schain_db, dutils): - with mock.patch('core.schains.monitor.main.Task', TaskNoAction), \ - mock.patch('core.schains.monitor.main.is_node_part_of_chain', return_value=True): - assert run_monitor_for_schain( - skale, - skale_ima, - node_config, - {'name': schain_db, 'partOfNode': 0, 'generation': 0}, - once=True, - dutils=dutils - ) - with mock.patch('core.schains.monitor.main.Task', TaskNoAction): - assert run_monitor_for_schain( - skale, - skale_ima, - node_config, - {'name': schain_db, 'partOfNode': 0, 'generation': 0}, - once=True, - dutils=dutils - ) - - @pytest.fixture def sync_ranges(skale): skale.sync_manager.grant_sync_manager_role(skale.wallet.address) @@ -73,3 +46,45 @@ def test_is_node_part_of_chain(skale, schain_on_contracts, node_config): node_exist_node = 10000 chain_on_node = is_node_part_of_chain(skale, schain_on_contracts, node_exist_node) assert not chain_on_node + + +def test_run_monitor_for_schain( + skale, + skale_ima, + schain_on_contracts, + node_config, + schain_db, + dutils +): + with mock.patch('core.schains.monitor.main.keep_tasks_running') as keep_tasks_running_mock: + run_monitor_for_schain( + skale, + skale_ima, + node_config, + schain={'name': schain_db, 'partOfNode': 0, 'generation': 0}, + dutils=dutils, + once=True + ) + assert isinstance(keep_tasks_running_mock.call_args[0][0], ThreadPoolExecutor) + assert isinstance(keep_tasks_running_mock.call_args[0][1][0], Task) + assert isinstance(keep_tasks_running_mock.call_args[0][1][1], Task) + assert keep_tasks_running_mock.call_args[0][2] == [None, None] + + +def test_run_monitor_for_schain_left( + skale, + skale_ima, + node_config, + schain_db, + dutils +): + with mock.patch('core.schains.monitor.main.keep_tasks_running') as keep_tasks_running_mock: + run_monitor_for_schain( + skale, + skale_ima, + node_config, + schain={'name': 'not-on-node', 'partOfNode': 0, 'generation': 0}, + dutils=dutils, + once=True + ) + keep_tasks_running_mock.assert_not_called() From a5b4b9968c840a97ead227e1a06df5889ed01725 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 16:58:27 +0000 Subject: [PATCH 81/84] Remove unused structures --- core/schains/monitor/skaled_monitor.py | 8 -------- tools/wallet_utils.py | 23 ----------------------- 2 files changed, 31 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index eb4205555..1bb6b5962 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -206,14 +206,6 @@ def is_skaled_repair_status(checks: SkaledChecks, skaled_status: Optional[Skaled return not checks.skaled_container.status and needs_repair -def is_skaled_reload_status(checks: SkaledChecks, skaled_status: Optional[SkaledStatus]) -> bool: - if skaled_status is None: - return False - skaled_status.log() - needs_reload = skaled_status.start_again and not skaled_status.start_from_snapshot - return not checks.skaled_container and needs_reload - - def no_config(checks: SkaledChecks) -> bool: return not checks.config diff --git a/tools/wallet_utils.py b/tools/wallet_utils.py index 255edc503..3faca9e6a 100644 --- a/tools/wallet_utils.py +++ b/tools/wallet_utils.py @@ -20,12 +20,10 @@ import logging -import requests from redis import Redis from skale.utils.web3_utils import init_web3 from skale.wallets import BaseWallet, RedisWalletAdapter, SgxWallet from skale.wallets.web3_wallet import to_checksum_address -from web3.providers.rpc import HTTPProvider from tools.configs import ( DEFAULT_POOL, @@ -72,24 +70,3 @@ def init_wallet( path_to_cert=SGX_CERTIFICATES_FOLDER ) return RedisWalletAdapter(rs, pool, sgx_wallet) - - -class HTTPProviderNoCache(HTTPProvider): - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs, session=None) - - def make_request(self, method, params): - logger.debug('Making request HTTPCustom. URI: %s, Method: %s', - self.endpoint_uri, method) - request_data = self.encode_rpc_request(method, params) - raw_response = requests.post( - self.endpoint_uri, - request_data, - **self.get_request_kwargs() - ) - raw_response.raise_for_status() - response = self.decode_rpc_response(raw_response.content) - logger.debug('Getting response HTTP Custom. URI: %s, ' - 'Method: %s, Response: %s', - self.endpoint_uri, method, response) - return response From ac756e80e17939d5b76720fffc14ff135db9a109 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 16:59:00 +0000 Subject: [PATCH 82/84] Improve logging in actions --- core/schains/monitor/action.py | 48 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 8d435e8ed..50d924ec3 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -152,6 +152,7 @@ def __init__( def config_dir(self) -> bool: initial_status = self.checks.config_dir.status if not initial_status: + logger.info('Initializing config dir') init_schain_config_dir(self.name) else: logger.info('config_dir - ok') @@ -161,6 +162,7 @@ def config_dir(self) -> bool: def dkg(self) -> bool: initial_status = self.checks.dkg.status if not initial_status: + logger.info('Running safe_run_dkg') dkg_result = safe_run_dkg( skale=self.skale, schain_name=self.name, @@ -177,13 +179,17 @@ def dkg(self) -> bool: if not dkg_result.status.is_done(): raise DkgError('DKG failed') else: - logger.info('dkg - ok') + logger.info('Dkg - ok') return initial_status @BaseActionManager.monitor_block def upstream_config(self) -> bool: initial_status = self.checks.upstream_config if not initial_status: + logger.info( + 'Creating new upstream_config rotation_id: %s, stream: %s', + self.rotation_data.get('rotation_id'), self.stream_version + ) create_new_schain_config( skale=self.skale, node_id=self.node_config.id, @@ -229,6 +235,7 @@ def __init__( def volume(self) -> bool: initial_status = self.checks.volume.status if not initial_status: + logger.info('Creating volume') init_data_volume(self.schain, dutils=self.dutils) else: logger.info('Volume - ok') @@ -263,6 +270,11 @@ def skaled_container( if download_snapshot: public_key = self.public_key + logger.info( + 'Starting skaled container watchman snapshot: %s, start_ts: %s', + download_snapshot, + start_ts + ) monitor_schain_container( self.schain, schain_record=self.schain_record, @@ -280,30 +292,34 @@ def skaled_container( @BaseActionManager.monitor_block def restart_skaled_container(self) -> bool: initial_status = True - if not is_container_exists(self.name, dutils=self.dutils): - logger.info(f'sChain {self.name}: container doesn\'t exits, running container...') - initial_status = self.skaled_container() - else: + if is_container_exists(self.name, dutils=self.dutils): + logger.info('Skaled container exists, restarting') restart_container(SCHAIN_CONTAINER, self.schain, dutils=self.dutils) + else: + logger.info('Skaled container doesn\'t exists, running skaled watchman') + initial_status = self.skaled_container() return initial_status @BaseActionManager.monitor_block def restart_ima_container(self) -> bool: initial_status = True - if not is_container_exists(self.name, container_type=IMA_CONTAINER, dutils=self.dutils): - initial_status = self.ima_container() - else: + if is_container_exists(self.name, container_type=IMA_CONTAINER, dutils=self.dutils): + logger.info('IMA container exists, restarting') restart_container(IMA_CONTAINER, self.schain, dutils=self.dutils) + else: + logger.info('IMA container doesn\'t exists, running skaled watchman') + initial_status = self.ima_container() return initial_status @BaseActionManager.monitor_block def reloaded_skaled_container(self) -> bool: - logger.info('starting skaled with reloaded configuration') + logger.info('Starting skaled from scratch') initial_status = True if is_container_exists(self.name, dutils=self.dutils): + logger.info('Removing skaled container') remove_schain_container(self.name, dutils=self.dutils) else: - logger.warning('container doesn\'t exists') + logger.warning('Container doesn\'t exists') self.schain_record.set_restart_count(0) self.schain_record.set_failed_rpc_count(0) self.schain_record.set_needs_reload(False) @@ -315,6 +331,7 @@ def skaled_rpc(self) -> bool: initial_status = self.checks.rpc.status if not initial_status: self.display_skaled_logs() + logger.info('Handling schain rpc') handle_failed_schain_rpc( self.schain, schain_record=self.schain_record, @@ -330,7 +347,7 @@ def skaled_rpc(self) -> bool: def ima_container(self) -> bool: initial_status = self.checks.ima_container if not initial_status: - logger.info('trying to run IMA container') + logger.info('Running IMA container watchman') monitor_ima_container( self.schain, self.ima_data, @@ -342,7 +359,7 @@ def ima_container(self) -> bool: @BaseActionManager.monitor_block def cleanup_schain_docker_entity(self) -> bool: - logger.info('removing docker artifacts') + logger.info('Removing skaled docker artifacts') remove_schain_container(self.name, dutils=self.dutils) time.sleep(SCHAIN_CLEANUP_TIMEOUT) remove_schain_volume(self.name, dutils=self.dutils) @@ -352,15 +369,15 @@ def cleanup_schain_docker_entity(self) -> bool: def update_config(self) -> bool: upstream_path = get_upstream_config_filepath(self.name) if upstream_path: - logger.info('syncing with upstream %s', upstream_path) + logger.info('Syncing config with upstream %s', upstream_path) sync_config_with_file(self.name, upstream_path) - logger.info('no upstream config yet') + logger.info('No upstream config yet') return upstream_path is not None @BaseActionManager.monitor_block def send_exit_request(self) -> None: finish_ts = self.upstream_finish_ts - logger.info('Skaled exit finish_ts %s', finish_ts) + logger.info('Trying to set skaled exit time %s', finish_ts) if finish_ts is not None: set_rotation_for_schain(self.name, finish_ts) @@ -392,4 +409,5 @@ def notify_repair_mode(self) -> None: @BaseActionManager.monitor_block def disable_repair_mode(self) -> None: + logger.info('Switching off repair mode') switch_off_repair_mode(self.name) From 2dff12aa6ad24a6f2042193518ec2b3909ca6986 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 17:27:33 +0000 Subject: [PATCH 83/84] Remove unused new_schain check --- core/schains/checks.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index c8c47c1c2..6189d8545 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -95,9 +95,9 @@ class IChecks(ABC): def get_all(self, log=True, save=False, checks_filter=None) -> Dict: pass - @abstractmethod def is_healthy(self) -> bool: - pass + checks = self.get_all() + return False not in checks.values() class ConfigChecks(IChecks): @@ -141,9 +141,6 @@ def upstream_config(self) -> CheckRes: logger.debug('Upstream configs for %s: %s', self.name, upstreams) return len(upstreams) > 0 - def new_schain(self) -> CheckRes: - return CheckRes(self.schain_record.new_schain) - def get_all(self, log=True, save=False, checks_filter=None) -> Dict: if not checks_filter: checks_filter = API_ALLOWED_CHECKS @@ -181,7 +178,6 @@ def __init__( self.container_name = get_container_name(SCHAIN_CONTAINER, self.name) self.ima_linked = ima_linked self.rc = rule_controller - self._new_schain = self.schain_record.new_schain def get_all(self, log=True, save=False, checks_filter=None) -> Dict: if not checks_filter: @@ -201,14 +197,6 @@ def get_all(self, log=True, save=False, checks_filter=None) -> Dict: save_checks_dict(self.name, checks_dict) return checks_dict - def is_healthy(self) -> bool: - checks = self.get_all() - return False not in checks.values() - - @property - def new_schain(self) -> CheckRes: - return CheckRes(self._new_schain) - @property def upstream_exists(self) -> CheckRes: upstream_path = get_upstream_config_filepath(self.name) @@ -222,6 +210,11 @@ def rotation_id_updated(self) -> int: config_path = schain_config_filepath(self.name) upstream_rotations = get_rotation_ids_from_config_file(upstream_path) config_rotations = get_rotation_ids_from_config_file(config_path) + logger.debug( + 'Comparing rotation_ids between upstream %s and %s', + upstream_path, + config_path + ) return CheckRes(upstream_rotations == config_rotations) @property @@ -230,6 +223,7 @@ def config_updated(self) -> CheckRes: return CheckRes(False) upstream_path = get_upstream_config_filepath(self.name) config_path = schain_config_filepath(self.name) + logger.debug('Checking if %s updated according to %s', config_path, upstream_path) if not upstream_path: return CheckRes(True) return CheckRes(filecmp.cmp(upstream_path, config_path)) From 5cdc5bcdbb51d33757b77070c32ed7e4a1bbd28f Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Jun 2023 18:51:15 +0000 Subject: [PATCH 84/84] Fix repair monitor. Improve logs --- core/schains/monitor/config_monitor.py | 4 ++-- core/schains/monitor/main.py | 12 ++++++++---- core/schains/monitor/skaled_monitor.py | 5 +++-- core/schains/task.py | 11 ++++++++++- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index 1e4ff5a60..a406243db 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -43,13 +43,13 @@ def execute(self) -> None: def run(self): typename = type(self).__name__ - logger.info('Config monitor type %s', typename) + logger.info('Config monitor type %s starting', typename) self.am._upd_last_seen() self.am._upd_schain_record() self.execute() self.am.log_executed_blocks() self.am._upd_last_seen() - logger.info('Finished %s config monitor runner', typename) + logger.info('Config monitor type %s finished', typename) class RegularConfigMonitor(BaseConfigMonitor): diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index cfcfe9c8b..d9e08a868 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -50,9 +50,11 @@ from web.models.schain import upsert_schain_record -MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 90 -MAX_SCHAIN_MONITOR_SLEEP_INTERVAL = 180 +MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 1 +MAX_SCHAIN_MONITOR_SLEEP_INTERVAL = 9 +SKALED_PIPELINE_SLEEP = 10 +CONFIG_PIPELINE_SLEEP = 40 logger = logging.getLogger(__name__) @@ -148,7 +150,7 @@ def post_monitor_sleep(): MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, MAX_SCHAIN_MONITOR_SLEEP_INTERVAL ) - logger.info('%s monitor completed, sleeping for {schain_monitor_sleep}s...') + logger.info('Monitor completed, sleeping for %d', schain_monitor_sleep) time.sleep(schain_monitor_sleep) @@ -183,6 +185,7 @@ def create_and_execute_tasks( node_config=node_config, dutils=dutils ), + sleep=SKALED_PIPELINE_SLEEP ) ] if not leaving_chain: @@ -195,7 +198,8 @@ def create_and_execute_tasks( schain=schain, node_config=node_config, stream_version=stream_version - ) + ), + sleep=CONFIG_PIPELINE_SLEEP )) keep_tasks_running(executor, tasks, futures) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 1bb6b5962..a1c638245 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -48,13 +48,13 @@ def execute(self) -> None: def run(self): typename = type(self).__name__ - logger.info('Skaled monitor type %s', typename) + logger.info('Skaled monitor type %s starting', typename) self.am._upd_last_seen() self.am._upd_schain_record() self.execute() self.am.log_executed_blocks() self.am._upd_last_seen() - logger.info('Finished %s skaled monitor runner', typename) + logger.info('Skaled monitor type %s finished', typename) class RegularSkaledMonitor(BaseSkaledMonitor): @@ -84,6 +84,7 @@ def execute(self) -> None: self.am.volume() if self.checks.volume and not self.checks.skaled_container: self.am.skaled_container(download_snapshot=True) + self.am.disable_repair_mode() class BackupSkaledMonitor(BaseSkaledMonitor): diff --git a/core/schains/task.py b/core/schains/task.py index e6231ed07..b95a8eb92 100644 --- a/core/schains/task.py +++ b/core/schains/task.py @@ -7,16 +7,25 @@ class Task: - def __init__(self, name: str, action: Callable, index: int = 0) -> None: + def __init__( + self, + name: str, + action: Callable, + index: int = 0, + sleep: int = 2 + ) -> None: self.name = name self.index = index self.action = action + self.sleep = sleep def run(self) -> None: try: self.action() except Exception as e: logger.exception('Task %s failed with %s', self.name, e) + logger.info('Sleeping after task execution for %d', self.sleep) + time.sleep(self.sleep) def keep_tasks_running(