Skip to content

Commit

Permalink
feat: DEVOPS-1795 Speed up the backup/restore skipping the package zip (
Browse files Browse the repository at this point in the history
  • Loading branch information
frankmeds authored Dec 24, 2024
1 parent b385200 commit 81e989c
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 105 deletions.
20 changes: 11 additions & 9 deletions z2/docs/deployer.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Commands:
deposit Deposit the stake amounts to all the validators
rpc Run RPC calls over the internal network nodes
ssh Run command over SSH in the internal network nodes
backup Backup a node data dir
restore Restore a node data dir from a backup
backup Backup a node data dir in the persistence bucket
restore Restore a node data dir from a backup in the persistence bucket
reset Reset a network stopping all the nodes and cleaning the /data folder
restart Restart a network stopping all the nodes and starting the service again
block-number Show the network nodes block number
Expand Down Expand Up @@ -239,7 +239,7 @@ Options:
--max-parallel <MAX_PARALLEL>
Define the number of nodes to process in parallel. Default: 50
--persistence-url <PERSISTENCE_URL>
gsutil URI of the persistence file. Ie. gs://my-bucket/my-file
gsutil URI of the persistence file. Ie. gs://my-bucket/my-folder
--checkpoint-url <CHECKPOINT_URL>
gsutil URI of the checkpoint file. Ie. gs://my-bucket/my-file. By enabling this option the install will be performed only on the validator nodes
-v, --verbose...
Expand Down Expand Up @@ -460,15 +460,16 @@ z2 deployer backup --help
```
```bash
Backup a node data dir
Backup a node data dir in the persistence bucket
Usage: z2 deployer backup [OPTIONS] --file <FILE> [CONFIG_FILE]
Usage: z2 deployer backup [OPTIONS] [CONFIG_FILE]
Arguments:
[CONFIG_FILE] The network deployer config file
Options:
-f, --file <FILE> The path of the backup file. It can be local path or a gsutil URI of the persistence file. Ie. gs://my-bucket/my-file
-n, --name <NAME> The name of the backup folder. If zip is specified, it represents the name of the zip file
--zip If specified, create a zip file containing the backup
-v, --verbose... Increase logging verbosity
-q, --quiet... Decrease logging verbosity
-h, --help Print help
Expand All @@ -494,15 +495,16 @@ z2 deployer restore --help
```
```bash
Restore a node data dir from a backup
Restore a node data dir from a backup in the persistence bucket
Usage: z2 deployer restore [OPTIONS] --file <FILE> [CONFIG_FILE]
Usage: z2 deployer restore [OPTIONS] [CONFIG_FILE]
Arguments:
[CONFIG_FILE] The network deployer config file
Options:
-f, --file <FILE> The path of the backup file. It can be local path or a gsutil URI of the persistence file. Ie. gs://my-bucket/my-file
-n, --name <NAME> The name of the backup folder. If zip is specified, it represents the name of the zip file
--zip If specified, restore the persistence from a zip file
--max-parallel <MAX_PARALLEL> Define the number of nodes to process in parallel. Default: 50
-v, --verbose... Increase logging verbosity
-q, --quiet... Decrease logging verbosity
Expand Down
16 changes: 15 additions & 1 deletion z2/resources/node_provision.tera.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def configure_logrotate():
with open("/etc/logrotate.d/zilliqa.conf", "w") as f:
f.write(LOGROTATE_CONFIG)

def download_persistence():
def download_persistence_file():
if PERSISTENCE_URL is not None and PERSISTENCE_URL != "":
PERSISTENCE_DIR="/data"
run_or_die(["rm", "-rf", f"{PERSISTENCE_DIR}"])
Expand All @@ -628,6 +628,20 @@ def download_persistence():
run_or_die(["tar", "xf", f"{PERSISTENCE_FILENAME}"])
run_or_die(["rm", "-f", f"{PERSISTENCE_FILENAME}"])

def download_persistence_folder():
if PERSISTENCE_URL is not None and PERSISTENCE_URL != "":
PERSISTENCE_DIR="/data"
run_or_die(["sudo", "rm", "-rf", f"{PERSISTENCE_DIR}"])
os.makedirs(PERSISTENCE_DIR, exist_ok=True)
run_or_die(["sudo", "gsutil", "-m", "cp", "-r", f"{PERSISTENCE_URL}/*", f"{PERSISTENCE_DIR}"])

def download_persistence():
if PERSISTENCE_URL is not None and PERSISTENCE_URL != "":
if PERSISTENCE_URL.endswith(".tar.gz"):
download_persistence_file()
else:
download_persistence_folder()

def download_checkpoint():
if CHECKPOINT_URL is not None and CHECKPOINT_URL != "":
PERSISTENCE_DIR="/data"
Expand Down
37 changes: 24 additions & 13 deletions z2/src/bin/z2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ enum DeployerCommands {
Rpc(DeployerRpcArgs),
/// Run command over SSH in the internal network nodes
Ssh(DeployerSshArgs),
/// Backup a node data dir
/// Backup a node data dir in the persistence bucket
Backup(DeployerBackupArgs),
/// Restore a node data dir from a backup
/// Restore a node data dir from a backup in the persistence bucket
Restore(DeployerRestoreArgs),
/// Reset a network stopping all the nodes and cleaning the /data folder
Reset(DeployerActionsArgs),
Expand Down Expand Up @@ -144,7 +144,7 @@ pub struct DeployerInstallArgs {
/// Define the number of nodes to process in parallel. Default: 50
#[clap(long)]
max_parallel: Option<usize>,
/// gsutil URI of the persistence file. Ie. gs://my-bucket/my-file
/// gsutil URI of the persistence file. Ie. gs://my-bucket/my-folder
#[clap(long)]
persistence_url: Option<String>,
/// gsutil URI of the checkpoint file. Ie. gs://my-bucket/my-file. By enabling this option the install will be performed only on the validator nodes
Expand Down Expand Up @@ -221,18 +221,24 @@ pub struct DeployerSshArgs {

#[derive(Args, Debug)]
pub struct DeployerBackupArgs {
/// The path of the backup file. It can be local path or a gsutil URI of the persistence file. Ie. gs://my-bucket/my-file
/// The name of the backup folder. If zip is specified, it represents the name of the zip file.
#[clap(long, short)]
file: String,
name: Option<String>,
/// If specified, create a zip file containing the backup
#[clap(long)]
zip: bool,
/// The network deployer config file
config_file: Option<String>,
}

#[derive(Args, Debug)]
pub struct DeployerRestoreArgs {
/// The path of the backup file. It can be local path or a gsutil URI of the persistence file. Ie. gs://my-bucket/my-file
/// The name of the backup folder. If zip is specified, it represents the name of the zip file.
#[clap(long, short)]
file: String,
name: Option<String>,
/// If specified, restore the persistence from a zip file
#[clap(long)]
zip: bool,
/// Define the number of nodes to process in parallel. Default: 50
#[clap(long)]
max_parallel: Option<usize>,
Expand Down Expand Up @@ -872,7 +878,7 @@ async fn main() -> Result<()> {
"Provide a configuration file. [--config-file] mandatory argument"
)
})?;
plumbing::run_deployer_backup(&config_file, &arg.file)
plumbing::run_deployer_backup(&config_file, arg.name.clone(), arg.zip)
.await
.map_err(|err| {
anyhow::anyhow!("Failed to run deployer backup command: {}", err)
Expand All @@ -885,11 +891,16 @@ async fn main() -> Result<()> {
"Provide a configuration file. [--config-file] mandatory argument"
)
})?;
plumbing::run_deployer_restore(&config_file, &arg.file, arg.max_parallel)
.await
.map_err(|err| {
anyhow::anyhow!("Failed to run deployer restore command: {}", err)
})?;
plumbing::run_deployer_restore(
&config_file,
arg.max_parallel,
arg.name.clone(),
arg.zip,
)
.await
.map_err(|err| {
anyhow::anyhow!("Failed to run deployer restore command: {}", err)
})?;
Ok(())
}
DeployerCommands::Reset(ref arg) => {
Expand Down
Loading

0 comments on commit 81e989c

Please sign in to comment.