From 15e76b13d8cea31b959fdc877fa784d1686bc7a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 9 Jul 2024 09:26:30 -0300 Subject: [PATCH 1/3] Add genesis benchmark scripts --- scripts/genesis-benchmarks/README.md | 99 +++++++++++++++++++ scripts/genesis-benchmarks/provision.sh | 79 +++++++++++++++ scripts/genesis-benchmarks/run-server-node.sh | 46 +++++++++ .../genesis-benchmarks/run-syncing-node.sh | 98 ++++++++++++++++++ 4 files changed, 322 insertions(+) create mode 100644 scripts/genesis-benchmarks/README.md create mode 100644 scripts/genesis-benchmarks/provision.sh create mode 100644 scripts/genesis-benchmarks/run-server-node.sh create mode 100644 scripts/genesis-benchmarks/run-syncing-node.sh diff --git a/scripts/genesis-benchmarks/README.md b/scripts/genesis-benchmarks/README.md new file mode 100644 index 0000000000..5fa251046d --- /dev/null +++ b/scripts/genesis-benchmarks/README.md @@ -0,0 +1,99 @@ +In these notes we describe how to benchmark and profile genesis with a +`cardano-node` downloading blocks from mainnet. We wrote three scripts to +help with this task: + +* provision.sh + * installs nix, clones repositories, and configures a cabal project to + use these repositories. + * Downloads a db-mainnet snapshot to save setup time. + * Installs system dependencies via nix and builds the cardano-node and + the immdb-server. +* run-server-node.sh: + Runs the immdb-server and launches toxiproxy so the server is reachable + through multiple addresses that can be supplied as different peers for the + syncing node to connect to. The amount of addresses is taken as an input + parameter to the script. +* run-syncing-node.sh + * Resets the state of the chain db so synchronization starts at a + designated slot in the past. + * Runs the syncing node until the tip of the volatile db reaches or is + newer than a designated slot. + * At the end prints how long it took to run the syncing node and the memory + usage report from the GHC runtime produced with +RTS -s + +provision.sh must run first to install system dependencies and tooling. Then +the server and its proxies can be started with run-server-node.sh. And finally +run-syncing-node.hs can be started in another terminal. + +All three scripts build Haskell libraries and binaries in the normal way, but +with a profiling-enabled GHC. This makes it easy to reuse the setup to build +with profiling enabled. + +We have used the scripts successfully in a t3.2xlarge machine in aws with 400 +GB of storage and ubuntu installed. + +### How to start the server + +The following command runs the server and configures 30 proxy addresses to +connect to it. +``` +$ ./run-server-node.sh 30 +``` + +### How to sync with genesis disabled + +The following command runs the syncing node which will connect to a single peer +and synchronize using Praos. +``` +$ ./run-syncing-node.sh 1 +``` + +The following command will connect to 30 peers and will synchronize using Praos. +``` +$ ./run-syncing-node.sh 30 +``` + +### How to sync with genesis enabled + +The following command runs the syncing node which will connect to two peers and +synchronize using Genesis. Genesis can also synchronize with only 1 peer, but +it is slower because the implementation is not optimized for that case. +``` +$ ENABLE_GENESIS=1 ./run-syncing-node.sh 2 +``` + +Using 30 peers should produce little overhead when compared to only 2 peers. +``` +$ ENABLE_GENESIS=1 ./run-syncing-node.sh 30 +``` + +### How to tweak the range of slots to sync + +By default, the script will synchronize 50000 slots starting at slot 100007913. +To synchronize 155000 slots instead, the environment variable `NUM_SLOTS` can +be used. +``` +$ NUM_SLOTS=155000 ENABLE_GENESIS=1 ./run-syncing-node.sh 30 +``` + +The initial slot of the range is harder to change because the startup of the +node is optimized with a snapshot of the ledger at slot 100007913. Generating a +snapshot for a different slot would make it possible to start the range from +it. + +### How to collect the eventlog of the syncing node + +The following command collects the eventlog. +``` +$ CARDANO_NODE_RTS_FLAGS=-ls ENABLE_GENESIS=1 ./run-syncing-node.sh 30 +``` + +This produces a file `cardano-node/cardano-node.eventlog` that can be fed to +`ghc-events-analyze` to observe the CPU consumption per thread. + +### How to produce tracing with full detail + +The following command enables tracing with full detail. +``` +$ ENABLE_FULL_TRACING=1 ENABLE_GENESIS=1 ./run-syncing-node.sh 30 +``` diff --git a/scripts/genesis-benchmarks/provision.sh b/scripts/genesis-benchmarks/provision.sh new file mode 100644 index 0000000000..07e4def502 --- /dev/null +++ b/scripts/genesis-benchmarks/provision.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +set -eux + +# Downloads db-mainnet +# Truncates db-mainnet +# Builds a snapshot of the ledged (takes near 12 hours) +# Builds immdb-server and cardano-node from amesgen/csj branch +# Dowloads toxiproxy + +echo colorscheme desert > .vimrc + +sh <(curl -L https://nixos.org/nix/install) --daemon --yes + +source /etc/bashrc + +mkdir -p .config/nix +echo experimental-features = nix-command flakes > .config/nix/nix.conf +mkdir -p .local/share/nix +echo '{"extra-substituters":{"https://cache.iog.io":true},"extra-trusted-public-keys":{"hydra.iohk.io:f/Ea+s+dFdN+3Y/G+FDgSq+a5NEWhJGzdjvKNGv0/EQ=":true}}' > .local/share/nix/trusted-settings.json + +sudo bash -c "echo extra-trusted-users = $USER >> /etc/nix/nix.conf" +sudo systemctl restart nix-daemon.service + +git clone https://github.com/IntersectMBO/cardano-node -b genesis/benchmarks-rebased + +git clone https://github.com/IntersectMBO/ouroboros-consensus -b genesis/benchmarks-rebased + +git clone https://github.com/IntersectMBO/ouroboros-network -b blockfetch/milestone-1-rebased + +cat << EOF > cardano-node/cabal.project.local +packages: + ../ouroboros-network/ouroboros-network + ../ouroboros-network/ouroboros-network-api + ../ouroboros-network/ouroboros-network-protocols + ../ouroboros-consensus/ouroboros-consensus + ../ouroboros-consensus/ouroboros-consensus-cardano + ../ouroboros-consensus/ouroboros-consensus-diffusion + ../ouroboros-consensus/ouroboros-consensus-protocol + ../ouroboros-consensus/sop-extras + ../ouroboros-consensus/strict-sop-core + +program-options + ghc-options: -Wwarn +EOF + +(cd cardano-node; nix develop .#project.x86_64-linux.projectVariants.ghc96.profiled.shell -c bash -c "cabal update; cabal build cardano-node:exe:cardano-node ouroboros-consensus-cardano:exe:immdb-server ouroboros-consensus-cardano:exe:db-analyser") + +wget -c https://update-cardano-mainnet.iohk.io/cardano-node-state/db-mainnet.tar.gz -O - | tar -xz + +# wget -c http://dl.amesgen.de/tmp/100007913_db-analyser.zst -O - | unzstd > 100007913_db-analyser + +# Make a copy of the chain db for the syncing node + +NODE_DB=db-mainnet-truncated +cp -r db-mainnet $NODE_DB + +# build a snapshot of the ledger at a recent slot + +build_ledger_snapshot() { + local NODE_DIR=$PWD/server-node + local CONFIG_YAML=$NODE_DIR/mainnet-config.yaml + local CONFIG_JSON=$NODE_DIR/config.json + mkdir -p $NODE_DIR + cp cardano-node/configuration/cardano/mainnet-config.yaml $CONFIG_YAML + # sed -i 's/EnableP2P: true/EnableP2P: false/g' $CONFIG_YAML + cp cardano-node/configuration/cardano/*.json $NODE_DIR + nix-shell -p yaml2json --run "yaml2json < $CONFIG_YAML > $CONFIG_JSON" + + (cd cardano-node; nix develop .#project.x86_64-linux.projectVariants.ghc96.profiled.shell -c bash -c "cabal exec db-analyser -- --store-ledger 100007913 --db ../db-mainnet cardano --config $CONFIG_JSON") + mv db-mainnet/ledger/100007913_db-analyser 100007913_db-analyser +} + +# call build_ledger_snapshot or alternatively download it from friends :) +wget -c https://ramify.amesgen.de/100007913_db-analyser.zst -O - | unzstd > 100007913_db-analyser + +# downloading toxiproxy +wget https://github.com/Shopify/toxiproxy/releases/download/v2.9.0/toxiproxy-server-linux-amd64 +wget https://github.com/Shopify/toxiproxy/releases/download/v2.9.0/toxiproxy-cli-linux-amd64 +chmod +x toxiproxy-cli-linux-amd64 toxiproxy-server-linux-amd64 diff --git a/scripts/genesis-benchmarks/run-server-node.sh b/scripts/genesis-benchmarks/run-server-node.sh new file mode 100644 index 0000000000..d9dfb97464 --- /dev/null +++ b/scripts/genesis-benchmarks/run-server-node.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -eux + +# run-server-node.sh +# +# Runs the server node and toxiproxies on ports 23000, 23001, etc +# + +N=$1 +NODE_DIR=$PWD/server-node +NODE_DB=$PWD/db-mainnet + +CONFIG_YAML=$NODE_DIR/mainnet-config.yaml +CONFIG_JSON=$NODE_DIR/config.json + +mkdir -p $NODE_DIR +cp cardano-node/configuration/cardano/*.json $NODE_DIR +cp cardano-node/configuration/cardano/mainnet-config.yaml $CONFIG_YAML +echo ConwayGenesisFile: shelley_qa-conway-genesis.json >> $CONFIG_YAML +nix-shell -p yaml2json --run "yaml2json < $CONFIG_YAML > $CONFIG_JSON" + +# Running the server node + +generate_toxiproxy_config_json() { + local OUTPUT=$1 + local CONFIG_ARR=("{\"name\": \"genesis_proxy 0\",\"upstream\":\"127.0.0.1:3001\",\"listen\":\"127.0.0.1:23000\",\"enabled\":true}") + for((i=1;i<$N;i++)) + do + CONFIG_ARR+=(",{\"name\": \"genesis_proxy $i\",\"upstream\":\"127.0.0.1:3001\",\"listen\":\"127.0.0.1:$((i+23000))\",\"enabled\":true}") + done + echo '[' ${CONFIG_ARR[*]} ']' > $OUTPUT +} + +TOXIPROXY_CONFIG=$NODE_DIR/toxiproxy_conf.json +generate_toxiproxy_config_json $TOXIPROXY_CONFIG +./toxiproxy-server-linux-amd64 -config $TOXIPROXY_CONFIG & + +trap 'kill $(jobs -p)' EXIT + +CABAL_FLAGS=${CABAL_FLAGS:-} + +(cd cardano-node; nix develop .#project.x86_64-linux.projectVariants.ghc96.profiled.shell -c bash -c "cabal build $CABAL_FLAGS ouroboros-consensus-cardano:exe:immdb-server && echo running immdb-server && cabal run $CABAL_FLAGS ouroboros-consensus-cardano:exe:immdb-server -- \ + --db $NODE_DB/immutable/ \ + --config $CONFIG_JSON \ + --port 3001" \ +) diff --git a/scripts/genesis-benchmarks/run-syncing-node.sh b/scripts/genesis-benchmarks/run-syncing-node.sh new file mode 100644 index 0000000000..8e59bb5e9d --- /dev/null +++ b/scripts/genesis-benchmarks/run-syncing-node.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +set -eux + +# NUM_SLOTS=1000000 run-syncing-node.sh +# +# Runs the syncing node with a topology with peers on ports 23000, 23001, etc +# +# NUM_SLOTS tells how many slots to sync up. The default value is 50000. +# + +N=$1 +NODE_DIR=$PWD/syncing-node +NODE_DB=$PWD/db-mainnet-truncated +mkdir -p $NODE_DIR/logs + +CONFIG_YAML=$NODE_DIR/mainnet-config.yaml +cp cardano-node/configuration/cardano/mainnet-config.yaml $CONFIG_YAML +cp cardano-node/configuration/cardano/*.json $NODE_DIR +echo TraceOptionPeerFrequency: 5000 >> $CONFIG_YAML +echo ConwayGenesisFile: shelley_qa-conway-genesis.json >> $CONFIG_YAML +sed -i 's/PeerSharing: .*//g' $CONFIG_YAML +if [ -v ENABLE_GENESIS ] +then + echo EnableGenesis: true >> $CONFIG_YAML +fi +if [ -v ENABLE_FULL_TRACING ] +then +cat <> $CONFIG_YAML +UseTraceDispatcher: True + +TraceOptions: + "": + severity: Debug + detail: DNormal + backends: + - Stdout MachineFormat + +TraceOptionPeerFrequency: 5000 +END +fi +TOPOLOGY_JSON=$NODE_DIR/topology.json + +truncate_chaindb() { + echo truncating chain db ... + rm -rf $NODE_DB/volatile + + mkdir -p $NODE_DB/ledger + rm -rf $NODE_DB/ledger/* + cp 100007913_db-analyser $NODE_DB/ledger/100007913 + + set +x + for i in $(seq -f "%05g" 4630 99999); do + if [ ! -f $NODE_DB/immutable/"$i.chunk" ]; then + break + fi + for t in primary secondary chunk; do + rm -f $NODE_DB/immutable/"$i.$t" + done + done + set -x +} + +generate_topology_json_p2p() { + local OUTPUT=$1 + local TOPOLOGY_ARR=("{\"address\": \"127.0.0.1\", \"port\": 23000}") + for((i=1;i<$N;i++)) + do + TOPOLOGY_ARR+=(", {\"address\": \"127.0.0.1\", \"port\": $((i+23000))}") + done + echo '{"publicRoots": [], "localRoots": [ { "accessPoints": [' ${TOPOLOGY_ARR[*]}'], "advertise": false, "hotValency": ' $N', "trustable": true}]}' > $OUTPUT +} + +generate_topology_json_legacy() { + local OUTPUT=$1 + local TOPOLOGY_ARR=("{\"addr\":\"127.0.0.1\",\"port\":23000,\"valency\":1}") + for((i=1;i<$N;i++)) + do + TOPOLOGY_ARR+=(",{\"addr\":\"127.0.0.1\",\"port\":$((i+23000)),\"valency\":1}") + done + echo '{"localRoots": [], "publicRoots": [], "Producers": [' ${TOPOLOGY_ARR[*]} ']}' > $OUTPUT +} + +truncate_chaindb +generate_topology_json_p2p $TOPOLOGY_JSON + +CABAL_FLAGS=${CABAL_FLAGS:-} + +(cd cardano-node; nix develop .#project.x86_64-linux.projectVariants.ghc96.profiled.shell -c bash -c "cabal build $CABAL_FLAGS cardano-node:exe:cardano-node && time cabal run $CABAL_FLAGS cardano-node:exe:cardano-node -- \ + run \ + --config $CONFIG_YAML \ + --database-path $NODE_DB \ + --topology $TOPOLOGY_JSON \ + --host-addr 0.0.0.0 --port 3002 \ + --socket-path $NODE_DIR/node.socket \ + --shutdown-on-slot-synced $((100007913 + ${NUM_SLOTS:-50000})) \ + +RTS -s ${CARDANO_NODE_RTS_FLAGS:-} \ + | tee $NODE_DIR/logs/sync-$(date -Iseconds).json" \ +) From d550cfc2f9a62d57b08e9dd18fd78692ffcbdfc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 26 Aug 2024 18:30:45 -0300 Subject: [PATCH 2/3] Allow to start syncing from slot 0 --- scripts/genesis-benchmarks/README.md | 8 +++++++- scripts/genesis-benchmarks/run-syncing-node.sh | 11 +++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/scripts/genesis-benchmarks/README.md b/scripts/genesis-benchmarks/README.md index 5fa251046d..a280ad774f 100644 --- a/scripts/genesis-benchmarks/README.md +++ b/scripts/genesis-benchmarks/README.md @@ -79,7 +79,13 @@ $ NUM_SLOTS=155000 ENABLE_GENESIS=1 ./run-syncing-node.sh 30 The initial slot of the range is harder to change because the startup of the node is optimized with a snapshot of the ledger at slot 100007913. Generating a snapshot for a different slot would make it possible to start the range from -it. +it. But at the price of deleting the current chaindb, one can start syncing +from slot 0 with, + +``` +# WARNING: deletes the chaindb from the local storage +$ SYNC_FROM_0=1 NUM_SLOTS=155000 ./run-syncing-node.sh 1 +``` ### How to collect the eventlog of the syncing node diff --git a/scripts/genesis-benchmarks/run-syncing-node.sh b/scripts/genesis-benchmarks/run-syncing-node.sh index 8e59bb5e9d..2fc45555b9 100644 --- a/scripts/genesis-benchmarks/run-syncing-node.sh +++ b/scripts/genesis-benchmarks/run-syncing-node.sh @@ -80,7 +80,14 @@ generate_topology_json_legacy() { echo '{"localRoots": [], "publicRoots": [], "Producers": [' ${TOPOLOGY_ARR[*]} ']}' > $OUTPUT } -truncate_chaindb +START_SLOT=100007913 +if [ -v SYNC_FROM_0 ] +then + START_SLOT=0 + rm -rf $NODE_DB +else + truncate_chaindb +fi generate_topology_json_p2p $TOPOLOGY_JSON CABAL_FLAGS=${CABAL_FLAGS:-} @@ -92,7 +99,7 @@ CABAL_FLAGS=${CABAL_FLAGS:-} --topology $TOPOLOGY_JSON \ --host-addr 0.0.0.0 --port 3002 \ --socket-path $NODE_DIR/node.socket \ - --shutdown-on-slot-synced $((100007913 + ${NUM_SLOTS:-50000})) \ + --shutdown-on-slot-synced $((${START_SLOT} + ${NUM_SLOTS:-50000})) \ +RTS -s ${CARDANO_NODE_RTS_FLAGS:-} \ | tee $NODE_DIR/logs/sync-$(date -Iseconds).json" \ ) From 1a049ab30fdca62e71a3fbf1ea37298945a8e6cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 26 Aug 2024 18:35:22 -0300 Subject: [PATCH 3/3] Allow to connect to other nodes than toxiproxy --- scripts/genesis-benchmarks/README.md | 25 +++++++++++++++++++ .../genesis-benchmarks/run-syncing-node.sh | 7 +++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/scripts/genesis-benchmarks/README.md b/scripts/genesis-benchmarks/README.md index a280ad774f..93903b6bc7 100644 --- a/scripts/genesis-benchmarks/README.md +++ b/scripts/genesis-benchmarks/README.md @@ -87,6 +87,31 @@ from slot 0 with, $ SYNC_FROM_0=1 NUM_SLOTS=155000 ./run-syncing-node.sh 1 ``` +### How to connect to other nodes than toxiproxy + +The following uses a custom topology file, instead of generating one that +connects to toxiproxy. + +``` +$ TOPOLOGY=toplogy.json ./run-syncing-node.sh 1 +``` + +For instance, +``` +$ cat topology.json +{"publicRoots": [] +, "localRoots": + [{ "accessPoints": + [ {"address": "34.216.201.242", "port": 3001} + , {"address": "3.77.115.8", "port": 3001} + ] + , "advertise": false + , "hotValency": 2 + , "trustable": true + }] +} +``` + ### How to collect the eventlog of the syncing node The following command collects the eventlog. diff --git a/scripts/genesis-benchmarks/run-syncing-node.sh b/scripts/genesis-benchmarks/run-syncing-node.sh index 2fc45555b9..426299c085 100644 --- a/scripts/genesis-benchmarks/run-syncing-node.sh +++ b/scripts/genesis-benchmarks/run-syncing-node.sh @@ -88,7 +88,12 @@ then else truncate_chaindb fi -generate_topology_json_p2p $TOPOLOGY_JSON +if [ -v TOPOLOGY ] +then + cp $TOPOLOGY $TOPOLOGY_JSON +else + generate_topology_json_p2p $TOPOLOGY_JSON +fi CABAL_FLAGS=${CABAL_FLAGS:-}