From 84b662fd8af2aa1f51137b2251ff6901a08bf888 Mon Sep 17 00:00:00 2001 From: Joe Matthew Date: Fri, 2 Aug 2024 12:58:12 +0200 Subject: [PATCH] fix: PAN-1832 use ethereum contracts docker swarm --- .github/workflows/ci.yaml | 89 ++++++++++++++++++------ .github/workflows/main.yaml | 1 - .github/workflows/publish-docker.yaml | 17 ++--- .github/workflows/release.yaml | 1 - Dockerfile | 22 +++--- Makefile | 46 +++++++++--- docker-compose.override.yml | 7 +- docker-compose.yml | 16 ++++- linux/scripts/pantos-service-node-celery | 11 ++- service-node-config.local.env | 4 +- 10 files changed, 151 insertions(+), 63 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b87c207..1533d7f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -46,7 +46,9 @@ jobs: with: repository: pantos-io/ethereum-contracts path: ethereum-contracts - sparse-checkout: docker-compose.yml + sparse-checkout: | + Makefile + docker-compose.yml - name: Create local signer_key run: | @@ -77,38 +79,52 @@ jobs: - name: Run Docker run: | - docker compose -f ethereum-contracts/docker-compose.yml up -d --no-build --wait + make docker-local + working-directory: ethereum-contracts env: DOCKER_TAG: "1.1.2" + - name: Ensure ethereum-contracts is accessible + run: | + for port in 8511 8510; do + echo "Checking availability of http://localhost:$port..." + for i in {1..30}; do + if curl -s --max-time 5 http://localhost:$port/ -X POST -H "Content-Type: application/json" --data '{"method":"eth_blockNumber","params":[],"id":1,"jsonrpc":"2.0"}'; then + echo " ... http://localhost:$port is accessible." + break + else + echo "http://localhost:$port is not accessible. Retrying in 10 seconds..." + sleep 10 + fi + done + done + working-directory: ethereum-contracts + - name: Build and load run: | - docker buildx bake \ - --set "*.cache-from=type=local,src=/tmp/.buildx-cache" \ + make docker-build ARGS="--set "*.cache-from=type=local,src=/tmp/.buildx-cache" \ --set "*.cache-to=type=local,dest=/tmp/.buildx-cache-new" \ --set "*.platform=linux/amd64" \ - --builder ${{ steps.buildx.outputs.name }} \ - -f docker-compose.yml \ - --load + --builder ${{ steps.buildx.outputs.name }}" - name: Test image timeout-minutes: 10 run: | - make docker ARGS="-d --no-build --wait" + make docker - name: Dump service node logs if: always() run: | - docker compose logs || true + timeout 1 make docker-logs || true - name: Tear down run: | - docker compose down -v + make docker-remove - name: Dump ethereum contract logs if: always() run: | - docker compose logs || true + timeout 1 make docker-logs || true working-directory: ethereum-contracts - name: Move cache @@ -139,7 +155,9 @@ jobs: with: repository: pantos-io/ethereum-contracts path: ethereum-contracts - sparse-checkout: docker-compose.yml + sparse-checkout: | + Makefile + docker-compose.yml - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -157,10 +175,27 @@ jobs: - name: Run Docker run: | - docker compose -f ethereum-contracts/docker-compose.yml up -d --no-build --wait + make docker-local + working-directory: ethereum-contracts env: DOCKER_TAG: "1.1.2" + - name: Ensure ethereum-contracts is accessible + run: | + for port in 8511 8510; do + echo "Checking availability of http://localhost:$port..." + for i in {1..30}; do + if curl -s --max-time 5 http://localhost:$port/ -X POST -H "Content-Type: application/json" --data '{"method":"eth_blockNumber","params":[],"id":1,"jsonrpc":"2.0"}'; then + echo " ... http://localhost:$port is accessible." + break + else + echo "http://localhost:$port is not accessible. Retrying in 10 seconds..." + sleep 10 + fi + done + done + working-directory: ethereum-contracts + - name: Download artifact uses: actions/download-artifact@v4 with: @@ -179,10 +214,17 @@ jobs: run: | sudo mkdir -p /etc/pantos sudo chmod 777 /etc/pantos - docker compose cp eth:/data/keystore /etc/pantos/keystore.eth - docker compose cp eth:/data/ETHEREUM.env /etc/pantos/ETHEREUM.env - docker compose cp bnb:/data/keystore /etc/pantos/keystore.bnb - docker compose cp bnb:/data/BNB.env /etc/pantos/BNB.env + # Get the task ID for the eth service + ETH_TASK_ID=$(docker ps --filter "name=stack-ethereum-contracts--1-eth" --format "{{.ID}}") + # Copy files from the eth service + docker cp $ETH_TASK_ID:/data/keystore /etc/pantos/keystore.eth + docker cp $ETH_TASK_ID:/data/ETHEREUM.env /etc/pantos/ETHEREUM.env + + # Get the task ID for the bnb service + BNB_TASK_ID=$(docker ps --filter "name=stack-ethereum-contracts--1-bnb" --format "{{.ID}}") + # Copy files from the bnb service + docker cp $BNB_TASK_ID:/data/keystore /etc/pantos/keystore.bnb + docker cp $BNB_TASK_ID:/data/BNB.env /etc/pantos/BNB.env working-directory: ethereum-contracts - name: Set env @@ -214,16 +256,23 @@ jobs: done echo "Celery is running" # Wait for curl to be positive - while true; do - response=$(curl -s -o /dev/null -w '%{http_code}' 'http://localhost:8080/bids?source_blockchain=0&destination_blockchain=1') + max_retries=60 + retries=0 + while [ $retries -lt $max_retries ]; do + response=$(curl -s --max-time 5 -o /dev/null -w '%{http_code}' 'http://localhost:8080/bids?source_blockchain=0&destination_blockchain=1') if [ "$response" -eq 200 ]; then echo "Received 200 response, exiting." break else echo "Did not receive 200 response, retrying in 1 second." sleep 1 + retries=$((retries + 1)) fi done + if [ $retries -eq $max_retries ]; then + echo "Max retries reached, exiting with failure." + exit 1 + fi echo "Service is up and running" - name: Print logs @@ -259,5 +308,5 @@ jobs: - name: Dump ethereum contract logs if: always() run: | - docker compose logs || true + timeout 1 make docker-logs || true working-directory: ethereum-contracts diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d0326fd..7d78d2f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -20,5 +20,4 @@ jobs: secrets: 'inherit' with: tag: development - architecture: amd64 environment: main diff --git a/.github/workflows/publish-docker.yaml b/.github/workflows/publish-docker.yaml index 861ecec..726b6a5 100644 --- a/.github/workflows/publish-docker.yaml +++ b/.github/workflows/publish-docker.yaml @@ -6,11 +6,6 @@ on: description: 'The docker tag name' required: true type: string - architecture: - description: 'Architecture to build' - required: false - type: string - default: "amd64" environment: description: 'Environment where the secrets are stored' required: true @@ -31,11 +26,17 @@ on: jobs: publish-docker: - name: Publish docker image for ${{ inputs.tag }}/${{ inputs.architecture }} + name: Publish docker image for ${{ inputs.tag }}/${{ matrix.architecture }} + strategy: + fail-fast: false + matrix: + architecture: + - amd64 + - arm64 environment: name: ${{ inputs.environment }} url: ${{ steps.set-output-url.outputs.deployment_dockerhub_url }} - runs-on: ${{ inputs.architecture == 'amd64' && 'ubuntu-latest' || 'ubuntu-20.04' }} + runs-on: macos-latest permissions: id-token: write steps: @@ -94,7 +95,7 @@ jobs: docker buildx bake \ --set "*.cache-from=type=local,src=/tmp/.buildx-cache" \ --set "*.cache-to=type=local,dest=/tmp/.buildx-cache-new" \ - --set "*.platform=linux/${{ inputs.architecture }}" \ + --set "*.platform=linux/${{ matrix.architecture }}" \ --builder ${{ steps.buildx.outputs.name }} \ --sbom=true \ --push \ diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9da6ac1..3140144 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -57,7 +57,6 @@ jobs: secrets: 'inherit' with: tag: ${{ github.event.release.tag_name }}${{ needs.define-environment.outputs.deployment_environment }} - architecture: amd64 environment: dockerhub extra_tag: ${{ github.event.release.prerelease && 'beta' || 'latest' }} diff --git a/Dockerfile b/Dockerfile index 1bb37ed..eec73f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,12 +8,12 @@ RUN apt-get update && \ ENV PATH="/root/miniconda3/bin:${PATH}" RUN ARCH=$(uname -m) && \ if [ "$ARCH" = "x86_64" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"; \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"; \ elif [ "$ARCH" = "aarch64" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh"; \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh"; \ else \ - echo "Unsupported architecture: $ARCH"; \ - exit 1; \ + echo "Unsupported architecture: $ARCH"; \ + exit 1; \ fi && \ wget "$MINICONDA_URL" -O miniconda.sh && \ mkdir /root/.conda && \ @@ -38,9 +38,9 @@ RUN apt-get update COPY --from=dev /app/dist/pantos-service-node_*.deb . RUN if [ -f ./*-signed.deb ]; then \ - apt-get install -y --no-install-recommends ./*-signed.deb; \ + apt-get install -y --no-install-recommends ./*-signed.deb; \ else \ - apt-get install -y --no-install-recommends ./*.deb; \ + apt-get install -y --no-install-recommends ./*.deb; \ fi && \ rm -rf *.deb && \ apt-get clean && \ @@ -48,14 +48,10 @@ RUN if [ -f ./*-signed.deb ]; then \ FROM prod AS servicenode -HEALTHCHECK --interval=10s --timeout=30s --start-period=5s --retries=5 CMD [ "/usr/bin/pantos-service-node-server", "--status" ] +ENV APP_PORT=8080 -ENV APP_PORT 8080 - -ENTRYPOINT /usr/bin/pantos-service-node-server +ENTRYPOINT ["/usr/bin/pantos-service-node-server"] FROM prod AS servicenode-celery-worker -HEALTHCHECK --interval=10s --timeout=30s --start-period=20s --retries=5 CMD [ "/usr/bin/pantos-service-node-celery", "--status" ] - -ENTRYPOINT /usr/bin/pantos-service-node-celery +ENTRYPOINT ["/usr/bin/pantos-service-node-celery"] diff --git a/Makefile b/Makefile index 52717f2..73335e6 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ PANTOS_SERVICE_NODE_VERSION := $(shell command -v poetry >/dev/null 2>&1 && poet PANTOS_SERVICE_NODE_SSH_HOST ?= bdev-service-node PYTHON_FILES_WITHOUT_TESTS := pantos/servicenode linux/scripts/start-web.py PYTHON_FILES := $(PYTHON_FILES_WITHOUT_TESTS) tests +STACK_BASE_NAME=stack-service-node +INSTANCE_COUNT ?= 1 .PHONY: check-version check-version: @@ -230,28 +232,50 @@ check-swarm-init: echo "Docker is already part of a swarm."; \ fi -docker: check-swarm-init - docker compose -f docker-compose.yml -f docker-compose.override.yml up --force-recreate $(ARGS) - docker-build: docker buildx bake -f docker-compose.yml --load $(ARGS) -docker-multiple: check-swarm-init docker-build - @if [ -z "$(INSTANCE_COUNT)" ]; then \ - echo "Error: INSTANCE_COUNT is not set"; \ - exit 1; \ - fi; \ - for i in $(shell seq 1 $(INSTANCE_COUNT)); do \ - STACK_NAME="stack-service-node-$$i"; \ +docker: check-swarm-init docker-build + @for i in $$(seq 1 $(INSTANCE_COUNT)); do \ + STACK_NAME="${STACK_BASE_NAME}-${STACK_IDENTIFIER}-$$i"; \ export INSTANCE=$$i; \ + echo "Deploying stack $$STACK_NAME"; \ docker stack deploy -c docker-compose.yml -c docker-compose.override.yml $$STACK_NAME --with-registry-auth --detach=false $(ARGS); \ done +.PHONY: docker-remove docker-remove: - @for stack in $$(docker stack ls --format "{{.Name}}" | awk '/^stack-service-node-/ {print}'); do \ + @STACK_NAME="${STACK_BASE_NAME}"; \ + if [ -n "$(STACK_IDENTIFIER)" ]; then \ + STACK_NAME="$$STACK_NAME-$(STACK_IDENTIFIER)"; \ + echo "Removing the stack with identifier $(STACK_IDENTIFIER)"; \ + else \ + echo "** Removing all stacks **"; \ + fi; \ + for stack in $$(docker stack ls --format "{{.Name}}" | awk "/^$$STACK_NAME/ {print}"); do \ echo "Removing stack $$stack"; \ docker stack rm $$stack --detach=false; \ + echo "Removing volumes for stack $$stack"; \ + docker volume ls --format "{{.Name}}" | awk '/^$$stack/ {print}' | xargs -r docker volume rm; \ + rm -Rf /data/$$stack; \ + done; \ + for compose_stack in $$(docker compose ls --filter "name=$$STACK_NAME" --format json | jq -r '.[].Name' | awk "/^$$STACK_NAME/ {print}"); do \ + echo "Removing Docker Compose stack $$compose_stack"; \ + docker compose -p $$compose_stack down -v; \ + done + +.PHONY: docker-logs +docker-logs: + @for stack in $$(docker stack ls --format "{{.Name}}" | awk '/^${STACK_BASE_NAME}-${STACK_IDENTIFIER}/ {print}'); do \ + echo "Showing logs for stack $$stack"; \ + for service in $$(docker stack services --format "{{.Name}}" $$stack); do \ + echo "Logs for service $$service in stack $$stack"; \ + docker service logs --no-task-ids $$service; \ + done; \ done docker-prod: check-swarm-init docker compose -f docker-compose.yml -f docker-compose.prod.yml up --force-recreate $(ARGS) + +docker-prod-down: check-swarm-init + docker compose -f docker-compose.yml -f docker-compose.prod.yml down -v $(ARGS) diff --git a/docker-compose.override.yml b/docker-compose.override.yml index c1adbdb..13f215d 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -1,7 +1,7 @@ networks: pantos-service-node: pantos-ethereum: - name: pantos-ethereum + name: pantos-ethereum-${ETHEREUM_NETWORK--1} external: true services: @@ -66,8 +66,9 @@ services: volumes: bnb-data: - name: bnb-data + # Requires the same amount of instances as the servicenode + name: bnb-data-${ETHEREUM_NETWORK--1} external: true eth-data: - name: eth-data + name: eth-data-${ETHEREUM_NETWORK--1} external: true diff --git a/docker-compose.yml b/docker-compose.yml index e28b373..8cbae0d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,7 +7,7 @@ networks: services: app: restart: always - image: pantosio/service-node-app:${DOCKER_TAG-local} + image: ${DOCKER_REGISTRY-pantosio}/service-node-app:${DOCKER_TAG-local} build: context: . dockerfile: Dockerfile @@ -31,12 +31,18 @@ services: CELERY_BACKEND: db+postgresql://pantos-service-node:pantos@db/pantos-service-node-celery ports: - 808${INSTANCE-0}:8080 + healthcheck: + test: ["CMD", "/usr/bin/pantos-service-node-server", "--status"] + interval: 5s + timeout: 10s + retries: 3 + start_period: 10s depends_on: - broker - db worker: restart: always - image: pantosio/service-node-worker:${DOCKER_TAG-local} + image: ${DOCKER_REGISTRY-pantosio}/service-node-worker:${DOCKER_TAG-local} build: context: . dockerfile: Dockerfile @@ -59,6 +65,12 @@ services: DB_URL: postgresql://pantos-service-node:pantos@db/pantos-service-node CELERY_BROKER: amqp://pantos-service-node:pantos@broker:5672/pantos-service-node CELERY_BACKEND: db+postgresql://pantos-service-node:pantos@db/pantos-service-node-celery + healthcheck: + test: ["CMD", "/usr/bin/pantos-service-node-celery", "--status"] + interval: 5s + timeout: 10s + retries: 3 + start_period: 20s depends_on: # Wait for the app to setup the DB - app diff --git a/linux/scripts/pantos-service-node-celery b/linux/scripts/pantos-service-node-celery index 591857c..626c430 100644 --- a/linux/scripts/pantos-service-node-celery +++ b/linux/scripts/pantos-service-node-celery @@ -34,8 +34,14 @@ while [ $# -gt 0 ]; do ;; -s|--status) echo "Querying the status of the celery worker" - exec ./bin/python -m celery -A pantos.servicenode report - exit $? + ./bin/python -m celery -A pantos.servicenode report + status=$? + if [ $status -ne 0 ]; then + echo "Status: UNHEALTHY, exit code: $status" + else + echo "Status: HEALTHY" + fi + exit $status ;; *) break @@ -56,6 +62,7 @@ if [ "$(id -u)" -ne "$(id -u "$APP_NAME")" ]; then fi while true; do + echo "Starting the celery worker" ./bin/python -m celery -A pantos.servicenode worker $EXTRA_ARGS -l INFO -n pantos.servicenode -Q transfers,bids PYTHON_EXIT_CODE=$? diff --git a/service-node-config.local.env b/service-node-config.local.env index 1942038..7437086 100644 --- a/service-node-config.local.env +++ b/service-node-config.local.env @@ -26,7 +26,7 @@ AVALANCHE_ACTIVE=false # **This path is used in CI** BNB_PRIVATE_KEY=/etc/pantos/keystore.bnb BNB_PRIVATE_KEY_PASSWORD='' -BNB_PROVIDER=http://localhost:8545 +BNB_PROVIDER=http://localhost:8511 BNB_CHAIN_ID=31338 BNB_FALLBACK_PROVIDER=' ' BNB_CONFIRMATIONS=2 @@ -43,7 +43,7 @@ CRONOS_ACTIVE=false # **This path is used in CI** ETHEREUM_PRIVATE_KEY=/etc/pantos/keystore.eth ETHEREUM_PRIVATE_KEY_PASSWORD='' -ETHEREUM_PROVIDER=http://localhost:8545 +ETHEREUM_PROVIDER=http://localhost:8510 ETHEREUM_CHAIN_ID=31337 ETHEREUM_CONFIRMATIONS=2 ETHEREUM_BLOCKS_UNTIL_RESUBMISSION=10