Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to restore schain from local snapshot #738

Merged
merged 14 commits into from
Sep 25, 2023
10 changes: 8 additions & 2 deletions node_cli/cli/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,15 @@ def backup_node(backup_folder_path):
is_flag=True,
hidden=True
)
@click.option(
'--config-only',
help='Only restore configuration files in .skale and artifacts',
is_flag=True,
hidden=True
)
@streamed_cmd
def restore_node(backup_path, env_file, no_snapshot):
restore(backup_path, env_file, no_snapshot)
def restore_node(backup_path, env_file, no_snapshot, config_only):
restore(backup_path, env_file, no_snapshot, config_only)


@node.command('maintenance-on', help="Set SKALE node into maintenance mode")
Expand Down
29 changes: 27 additions & 2 deletions node_cli/cli/schains.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from node_cli.core.schains import (
describe,
get_schain_firewall_rules,
get_schains_by_artifacts,
restore_schain_from_snapshot,
show_config,
show_dkg_info,
show_schains,
Expand All @@ -43,8 +45,17 @@ def schains() -> None:


@schains.command(help="List of sChains served by connected node")
def ls() -> None:
show_schains()
@click.option(
'-n', '--names',
help='Shows only chain names',
is_flag=True
)
def ls(names: bool) -> None:
if names:
schains: str = get_schains_by_artifacts()
print(schains)
else:
show_schains()


@schains.command(help="DKG statuses for each sChain on the node")
Expand Down Expand Up @@ -95,3 +106,17 @@ def repair(schain_name: str, snapshot_from: Optional[str] = None) -> None:
)
def info_(schain_name: str, json_format: bool) -> None:
describe(schain_name, raw=json_format)


@schains.command('restore', help='Restore schain from local snapshot')
@click.argument('schain_name')
@click.argument('snapshot_path')
@click.option('--schain-type', default='medium')
@click.option('--env-type', default=None)
def restore(
schain_name: str,
snapshot_path: str,
schain_type: str,
env_type: Optional[str]
) -> None:
restore_schain_from_snapshot(schain_name, snapshot_path)
5 changes: 4 additions & 1 deletion node_cli/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
SKALE_TMP_DIR = os.path.join(SKALE_DIR, '.tmp')

NODE_DATA_PATH = os.path.join(SKALE_DIR, 'node_data')
SCHAIN_NODE_DATA_PATH = os.path.join(NODE_DATA_PATH, 'schains')
NODE_CONFIG_PATH = os.path.join(NODE_DATA_PATH, 'node_config.json')
CONTAINER_CONFIG_PATH = os.path.join(SKALE_DIR, 'config')
CONTAINER_CONFIG_TMP_PATH = os.path.join(SKALE_TMP_DIR, 'config')
CONTRACTS_PATH = os.path.join(SKALE_DIR, 'contracts_info')
Expand All @@ -52,7 +54,8 @@
SGX_CERTIFICATES_DIR_NAME = 'sgx_certs'

COMPOSE_PATH = os.path.join(CONTAINER_CONFIG_PATH, 'docker-compose.yml')
STATIC_PARAMS_FILEPATH = os.path.join(CONTAINER_CONFIG_PATH, 'static_params.yaml')
STATIC_PARAMS_FILEPATH = os.path.join(
CONTAINER_CONFIG_PATH, 'static_params.yaml')
NGINX_TEMPLATE_FILEPATH = os.path.join(CONTAINER_CONFIG_PATH, 'nginx.conf.j2')
NGINX_CONFIG_FILEPATH = os.path.join(NODE_DATA_PATH, 'nginx.conf')

Expand Down
30 changes: 17 additions & 13 deletions node_cli/core/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def init(env_filepath):


@check_not_inited
def restore(backup_path, env_filepath, no_snapshot=False):
def restore(backup_path, env_filepath, no_snapshot=False, config_only=False):
env = get_node_env(env_filepath)
if env is None:
return
Expand All @@ -154,7 +154,7 @@ def restore(backup_path, env_filepath, no_snapshot=False):
logger.info('Adding BACKUP_RUN to env ...')
env['BACKUP_RUN'] = 'True' # should be str

restored_ok = restore_op(env, backup_path)
restored_ok = restore_op(env, backup_path, config_only=config_only)
if not restored_ok:
error_exit(
'Restore operation failed',
Expand Down Expand Up @@ -357,26 +357,30 @@ def is_base_containers_alive():
return len(skale_containers) >= BASE_CONTAINERS_AMOUNT


def get_node_info(format):
def get_node_info_plain():
status, payload = get_request(
blueprint=BLUEPRINT_NAME,
method='info'
)
if status == 'ok':
node_info = payload['node_info']
if format == 'json':
print(node_info)
elif node_info['status'] == NodeStatuses.NOT_CREATED.value:
print(TEXTS['service']['node_not_registered'])
else:
print_node_info(
node_info,
get_node_status(int(node_info['status']))
)
return payload['node_info']
else:
error_exit(payload, exit_code=CLIExitCodes.BAD_API_RESPONSE)


def get_node_info(format):
node_info = get_node_info_plain()
if format == 'json':
print(node_info)
elif node_info['status'] == NodeStatuses.NOT_CREATED.value:
print(TEXTS['service']['node_not_registered'])
else:
print_node_info(
node_info,
get_node_status(int(node_info['status']))
)


def get_node_status(status):
node_status = NodeStatuses(status).name
return TEXTS['node']['status'][node_status]
Expand Down
128 changes: 125 additions & 3 deletions node_cli/core/schains.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
import logging
import os
import pprint
import shutil
from pathlib import Path

from typing import Optional
from typing import Dict, Optional

from node_cli.utils.helper import get_request, post_request, error_exit
from node_cli.configs import (
ALLOCATION_FILEPATH,
NODE_CONFIG_PATH,
SCHAIN_NODE_DATA_PATH
)
from node_cli.configs.env import get_env_config

from node_cli.utils.helper import (
get_request,
error_exit,
safe_load_yml,
post_request
)
from node_cli.utils.exit_codes import CLIExitCodes
from node_cli.utils.print_formatters import (
print_dkg_statuses,
print_firewall_rules,
print_schain_info,
print_schains
)
from node_cli.utils.docker_utils import ensure_volume, is_volume_exists
from node_cli.utils.helper import read_json, run_cmd
from lvmpy.src.core import mount, volume_mountpoint


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -40,7 +58,8 @@ def show_schains() -> None:
if not schains:
print('No sChains found')
return
print_schains(schains)
else:
print_schains(schains)
else:
error_exit(payload, exit_code=CLIExitCodes.BAD_API_RESPONSE)

Expand Down Expand Up @@ -98,3 +117,106 @@ def describe(schain: str, raw=False) -> None:
print_schain_info(payload, raw=raw)
else:
error_exit(payload, exit_code=CLIExitCodes.BAD_API_RESPONSE)


def btrfs_set_readonly_false(subvolume_path: str) -> None:
run_cmd(['btrfs', 'property', 'set', '-ts', subvolume_path, 'ro', 'false'])


def btrfs_receive_binary(src_path: str, binary_path: str) -> None:
run_cmd(['btrfs', 'receive', '-f', binary_path, src_path])


def get_block_number_from_path(snapshot_path: str) -> int:
stem = Path(snapshot_path).stem
bn = -1
try:
bn = int(stem.split('-')[-1])
except ValueError:
return -1
return bn


def get_node_config() -> Dict:
return read_json(NODE_CONFIG_PATH)


def get_node_id() -> int:
info = get_node_config()
return info['node_id']


def migrate_prices_and_blocks(path: str, node_id: int) -> None:
db_suffix = '.db'
for sname in os.listdir(path):
subvolume_path = os.path.join(path, sname)
logger.debug('Processing %s', sname)
btrfs_set_readonly_false(subvolume_path)
if sname.endswith(db_suffix):
subvolume_path = os.path.join(path, sname)
dbname = sname.split('_')[0]
new_path = os.path.join(path, f'{dbname}_{node_id}{db_suffix}')
logger.debug('New path for %s %s', sname, new_path)
shutil.move(subvolume_path, new_path)


def make_btrfs_snapshot(src: str, dst: str) -> None:
run_cmd(['btrfs', 'subvolume', 'snapshot', src, dst])


def fillin_snapshot_folder(src_path: str, block_number: int) -> None:
snapshots_dirname = 'snapshots'
snapshot_folder_path = os.path.join(
src_path, snapshots_dirname, str(block_number))
os.makedirs(snapshot_folder_path, exist_ok=True)
for subvolume in os.listdir(src_path):
if subvolume != snapshots_dirname:
logger.debug('Copying %s to %s', subvolume, snapshot_folder_path)
subvolume_path = os.path.join(src_path, subvolume)
subvolume_snapshot_path = os.path.join(
snapshot_folder_path, subvolume)
make_btrfs_snapshot(subvolume_path, subvolume_snapshot_path)


def restore_schain_from_snapshot(
schain: str,
snapshot_path: str,
env_type: Optional[str] = None,
schain_type: str = 'medium'
) -> None:
if env_type is None:
env_config = get_env_config()
env_type = env_config['ENV_TYPE']
ensure_schain_volume(schain, schain_type, env_type)
block_number = get_block_number_from_path(snapshot_path)
if block_number == -1:
logger.error('Invalid snapshot path format')
return
node_id = get_node_id()

mount(schain)
src_path = volume_mountpoint(schain)
logger.info('Unpacking binary')
btrfs_receive_binary(src_path, snapshot_path)
logger.info('Migrating suvolumes')
migrate_prices_and_blocks(src_path, node_id)
migrate_prices_and_blocks(src_path, node_id)
logger.info('Recreating snapshot folder')
fillin_snapshot_folder(src_path, block_number)


def get_schains_by_artifacts() -> str:
return '\n'.join(os.listdir(SCHAIN_NODE_DATA_PATH))


def get_schain_volume_size(schain_type: str, env_type: str) -> int:
alloc = safe_load_yml(ALLOCATION_FILEPATH)
return alloc[env_type]['disk'][schain_type]


def ensure_schain_volume(schain: str, schain_type: str, env_type: str) -> None:
if not is_volume_exists(schain):
size = get_schain_volume_size(schain_type, env_type)
ensure_volume(schain, size)
else:
logger.warning('Volume %s already exists', schain)
8 changes: 4 additions & 4 deletions node_cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from node_cli.utils.helper import safe_load_texts, init_default_logger
from node_cli.configs import LONG_LINE
from node_cli.core.host import init_logs_dir
from node_cli.utils.helper import error_exit

TEXTS = safe_load_texts()

Expand Down Expand Up @@ -109,8 +110,7 @@ def handle_exception(exc_type, exc_value, exc_traceback):
try:
cmd_collection()
except Exception as err:
print(f'Command execution failed with {err}. Recheck your inputs')
traceback.print_exc()
logger.exception(f'Command failed with {err}')
finally:
logger.debug(f'execution time: {time.time() - start_time} seconds')
logger.debug('Execution time: %d seconds', time.time() - start_time)
error_exit(err)
logger.debug('Execution time: %d seconds', time.time() - start_time)
5 changes: 3 additions & 2 deletions node_cli/operations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def turn_on(env):
compose_up(env)


def restore(env, backup_path):
def restore(env, backup_path, config_only=False):
unpack_backup_archive(backup_path)
failed_checks = run_host_checks(
env['DISK_MOUNTPOINT'],
Expand Down Expand Up @@ -226,7 +226,8 @@ def restore(env, backup_path):
disk_device=env['DISK_MOUNTPOINT'],
env_type=env['ENV_TYPE']
)
compose_up(env)
if not config_only:
compose_up(env)

failed_checks = run_host_checks(
env['DISK_MOUNTPOINT'],
Expand Down
13 changes: 6 additions & 7 deletions node_cli/utils/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,16 +158,16 @@ def get_logs_backup_filepath(container: Container) -> str:
return os.path.join(REMOVED_CONTAINERS_FOLDER_PATH, log_file_name)


def ensure_volume(name: str, size: int, dutils=None):
def ensure_volume(name: str, size: int, driver='lvmpy', dutils=None):
dutils = dutils or docker_client()
if is_volume_exists(name, dutils=dutils):
logger.info(f'Volume with name {name} already exits')
logger.info('Volume %s already exits', name)
return
logger.info(f'Creating volume - size: {size}, name: {name}')
driver_opts = {'size': str(size)}
logger.info('Creating volume %s, size: %d', name, size)
driver_opts = {'size': str(size)} if driver == 'lvmpy' else None
volume = dutils.volumes.create(
name=name,
driver='lvmpy',
driver=driver,
driver_opts=driver_opts
)
return volume
Expand Down Expand Up @@ -270,5 +270,4 @@ def docker_cleanup(dclient=None, ignore=None):
cleanup_unused_images(dclient=dc, ignore=ignore)
system_prune()
except Exception as e:
logger.warning('Image cleanuping errored with %s', e)
logger.debug('Image cleanuping errored', exc_info=True)
logger.warning('Image cleanup errored with %s', e)
Loading