From e62d457122ff23c1b246d3f9cc3fba1efb3a487d Mon Sep 17 00:00:00 2001 From: Mattia Date: Sun, 30 Jun 2019 16:29:42 +0200 Subject: [PATCH] Release of version 1.0.7 (#147) * Upgrade to rapydo 0.6.6 * SDC: Introduced error 4044 - Parameter download_path is empty * SDC: importmanager is now https * Working on recursive pid cache * Added workers-only mode --- docs/deploy/startup.md | 4 +- docs/quick_start.md | 2 +- projects/b2stage/project_configuration.yaml | 24 +-- projects/b2stage/requirements.txt | 4 +- .../backend/apis/commons/seadatacloud.py | 11 +- projects/seadata/backend/tasks/seadata.py | 71 +++++++-- projects/seadata/builds/celery/Dockerfile | 2 +- projects/seadata/confs/workers-only.yml | 138 ++++++++++++++++++ projects/seadata/confs/workers.yml | 2 +- projects/seadata/project_configuration.yaml | 8 +- projects/seadata/requirements.txt | 4 +- 11 files changed, 233 insertions(+), 37 deletions(-) create mode 100644 projects/seadata/confs/workers-only.yml diff --git a/docs/deploy/startup.md b/docs/deploy/startup.md index cb9ad6a1..715b6080 100644 --- a/docs/deploy/startup.md +++ b/docs/deploy/startup.md @@ -5,7 +5,7 @@ To clone the working code: ```bash -$ VERSION=1.0.6 \ +$ VERSION=1.0.7 \ && git clone https://github.com/EUDAT-B2STAGE/http-api.git \ && cd http-api \ && git checkout $VERSION @@ -51,7 +51,7 @@ $ rapydo init If you wish to __**manually upgrade**__: ```bash -VERSION="0.6.5" +VERSION="0.6.6" git checkout $VERSION # supposely the rapydo framework has been updated, so you need to check: diff --git a/docs/quick_start.md b/docs/quick_start.md index ab1421a1..dfb5ff25 100644 --- a/docs/quick_start.md +++ b/docs/quick_start.md @@ -24,7 +24,7 @@ cd latest ################ # install the corrensponding rapydo framework version sudo pip3 install rapydo-controller -rapydo --project b2stage install --git 0.6.5 +rapydo --project b2stage install --git 0.6.6 # build and run rapydo --project b2stage init diff --git a/projects/b2stage/project_configuration.yaml b/projects/b2stage/project_configuration.yaml index 32e3726b..400ab9d3 100644 --- a/projects/b2stage/project_configuration.yaml +++ b/projects/b2stage/project_configuration.yaml @@ -5,8 +5,8 @@ project: title: EUDAT-B2STAGE HTTP-API server description: Processing data in EUDAT domains - version: 1.0.6 - rapydo: 0.6.5 + version: 1.0.7 + rapydo: 0.6.6 tags: eudat: all endpoints associated to EUDAT services @@ -111,38 +111,42 @@ releases: '0.6.0': type: RC1 rapydo: 0.5.3 - status: released + status: discontinued '0.6.1': type: RC2 rapydo: 0.5.4 - status: released + status: discontinued '1.0.0': type: stable rapydo: 0.6.0 - status: released + status: discontinued '1.0.1': type: patch rapydo: 0.6.1 - status: released + status: discontinued '1.0.2': type: patch rapydo: 0.6.1 - status: released + status: discontinued '1.0.3': type: stable rapydo: 0.6.2 - status: released + status: discontinued '1.0.4': type: stable rapydo: 0.6.3 - status: released + status: discontinued '1.0.5': type: stable rapydo: 0.6.4 status: released '1.0.6': - type: development + type: stable rapydo: 0.6.5 + status: released + '1.0.7': + type: development + rapydo: 0.6.6 status: development controller: diff --git a/projects/b2stage/requirements.txt b/projects/b2stage/requirements.txt index df8f22b7..14113601 100644 --- a/projects/b2stage/requirements.txt +++ b/projects/b2stage/requirements.txt @@ -1,2 +1,2 @@ -git+https://github.com/rapydo/utils.git@0.6.5 -git+https://github.com/rapydo/do.git@0.6.5 +git+https://github.com/rapydo/utils.git@0.6.6 +git+https://github.com/rapydo/do.git@0.6.6 diff --git a/projects/seadata/backend/apis/commons/seadatacloud.py b/projects/seadata/backend/apis/commons/seadatacloud.py index 8ab30256..1c55ea64 100644 --- a/projects/seadata/backend/apis/commons/seadatacloud.py +++ b/projects/seadata/backend/apis/commons/seadatacloud.py @@ -64,6 +64,7 @@ class ErrorCodes(object): MISSING_DOWNLOAD_PATH_PARAM = ("4041", "Parameter download_path is missing") UNABLE_TO_CREATE_ZIP_FILE = ("4042", "Unable to create merged zip file") INVALID_ZIP_SPLIT_OUTPUT = ("4043", "Unable to retrieve results from zip split") + EMPTY_DOWNLOAD_PATH_PARAM = ("4044", "Parameter download_path is empty") class Metadata(object): @@ -109,7 +110,7 @@ def post(self, payload, backdoor=False, edmo_code=None): "The following json should be sent to ImportManagerAPI, " + "but you enabled the backdoor") log.info(payload) - return True + return False from restapi.confs import PRODUCTION if not PRODUCTION: @@ -122,19 +123,23 @@ def post(self, payload, backdoor=False, edmo_code=None): import requests # print("TEST", self._uri) r = requests.post(self._uri, json=payload) + log.info("POST external IM API, status=%s, uri=%s", r.status_code, self._uri) from utilities import htmlcodes as hcodes if r.status_code != hcodes.HTTP_OK_BASIC: log.error( "CDI: failed to call external APIs (status: %s, uri: %s)", - (r.status_code, self._uri)) + r.status_code, self._uri) return False else: log.info( "CDI: called POST on external APIs (status: %s, uri: %s)", - (r.status_code, self._uri)) + r.status_code, self._uri) return True + log.warning("Unknown external APIs status") + return False + # NOTE this function is outside the previous class, and self is passed as parameter def seadata_pid(self, pid): diff --git a/projects/seadata/backend/tasks/seadata.py b/projects/seadata/backend/tasks/seadata.py index 75f84a3f..0af98198 100644 --- a/projects/seadata/backend/tasks/seadata.py +++ b/projects/seadata/backend/tasks/seadata.py @@ -136,6 +136,11 @@ def download_batch(self, batch_path, local_path, myjson): ErrorCodes.MISSING_DOWNLOAD_PATH_PARAM, myjson, backdoor, self ) + if download_path == '': + return notify_error( + ErrorCodes.EMPTY_DOWNLOAD_PATH_PARAM, + myjson, backdoor, self + ) file_count = params.get("data_file_count") if file_count is None: @@ -199,6 +204,13 @@ def download_batch(self, batch_path, local_path, myjson): myjson, backdoor, self, subject=download_url ) + except requests.exceptions.MissingSchema as e: + log.error(str(e)) + return notify_error( + ErrorCodes.UNREACHABLE_DOWNLOAD_PATH, + myjson, backdoor, self, + subject=download_url + ) if r.status_code != 200: @@ -316,7 +328,8 @@ def download_batch(self, batch_path, local_path, myjson): log.info("Copied: %s", irods_batch_file) request_edmo_code = myjson.get('edmo_code') - ext_api.post(myjson, backdoor=backdoor, edmo_code=request_edmo_code) + ret = ext_api.post(myjson, backdoor=backdoor, edmo_code=request_edmo_code) + log.info('CDI IM CALL = %s', ret) return "COMPLETED" @@ -449,7 +462,8 @@ def move_to_production_task(self, batch_id, irods_path, myjson): myjson[key] = value if len(errors) > 0: myjson['errors'] = errors - ext_api.post(myjson, backdoor=backdoor) + ret = ext_api.post(myjson, backdoor=backdoor) + log.info('CDI IM CALL = %s', ret) out = { 'total': total, 'step': counter, @@ -708,7 +722,6 @@ def unrestricted_order(self, order_id, order_path, zip_file_name, myjson): ######################### # NOTE: should I close the iRODS session ? ######################### - pass # imain.prc ################## @@ -735,7 +748,8 @@ def unrestricted_order(self, order_id, order_path, zip_file_name, myjson): myjson['errors'] = errors myjson[reqkey] = self.request.id # log.pp(myjson) - ext_api.post(myjson, backdoor=backdoor) + ret = ext_api.post(myjson, backdoor=backdoor) + log.info('CDI IM CALL = %s', ret) ################## out = { @@ -787,6 +801,11 @@ def download_restricted_order(self, order_id, order_path, myjson): ErrorCodes.MISSING_DOWNLOAD_PATH_PARAM, myjson, backdoor, self ) + if download_path == '': + return notify_error( + ErrorCodes.EMPTY_DOWNLOAD_PATH_PARAM, + myjson, backdoor, self + ) # NAME OF FINAL ZIP filename = params.get('zipfile_name') @@ -876,6 +895,13 @@ def download_restricted_order(self, order_id, order_path, myjson): myjson, backdoor, self, subject=download_url ) + except requests.exceptions.MissingSchema as e: + log.error(str(e)) + return notify_error( + ErrorCodes.UNREACHABLE_DOWNLOAD_PATH, + myjson, backdoor, self, + subject=download_url + ) if r.status_code != 200: @@ -1039,7 +1065,7 @@ def download_restricted_order(self, order_id, order_path, myjson): log.info("Creating a backup copy of final zip") backup_zip = final_zip + ".bak" if imain.is_dataobject(backup_zip): - log.info("%s already exists, removing previous backup") + log.info("%s already exists, removing previous backup", backup_zip) imain.remove(backup_zip) imain.move(final_zip, backup_zip) @@ -1133,7 +1159,8 @@ def download_restricted_order(self, order_id, order_path, myjson): if len(errors) > 0: myjson['errors'] = errors - ext_api.post(myjson, backdoor=backdoor) + ret = ext_api.post(myjson, backdoor=backdoor) + log.info('CDI IM CALL = %s', ret) return "COMPLETED" # 0 - avoid concurrent execution, introduce a cache like: @@ -1214,7 +1241,8 @@ def delete_orders(self, orders_path, local_orders_path, myjson): if len(errors) > 0: myjson['errors'] = errors - ext_api.post(myjson, backdoor=backdoor) + ret = ext_api.post(myjson, backdoor=backdoor) + log.info('CDI IM CALL = %s', ret) return "COMPLETED" @@ -1280,10 +1308,24 @@ def delete_batches(self, batches_path, local_batches_path, myjson): if len(errors) > 0: myjson['errors'] = errors - ext_api.post(myjson, backdoor=backdoor) + ret = ext_api.post(myjson, backdoor=backdoor) + log.info('CDI IM CALL = %s', ret) return "COMPLETED" +def recursive_list_files(imain, irods_path): + + data = [] + for current in imain.list(irods_path): + ifile = path.join(irods_path, current, return_str=True) + if imain.is_dataobject(ifile): + data.append(ifile) + else: + data.extend(recursive_list_files(imain, ifile)) + + return data + + @celery_app.task(bind=True) @send_errors_by_email def cache_batch_pids(self, irods_path): @@ -1300,8 +1342,14 @@ def cache_batch_pids(self, irods_path): 'errors': 0, } - for current in imain.list(irods_path): - ifile = path.join(irods_path, current, return_str=True) + data = recursive_list_files(imain, irods_path) + log.info("Found %s files", len(data)) + + # for current in imain.list(irods_path): + # ifile = path.join(irods_path, current, return_str=True) + + for ifile in data: + stats['total'] += 1 pid = r.get(ifile) @@ -1406,6 +1454,7 @@ def list_resources(self, batch_path, order_path, myjson): for n in orders: myjson[param_key]['orders'].append(n) - ext_api.post(myjson, backdoor=backdoor) + ret = ext_api.post(myjson, backdoor=backdoor) + log.info('CDI IM CALL = %s', ret) return "COMPLETED" diff --git a/projects/seadata/builds/celery/Dockerfile b/projects/seadata/builds/celery/Dockerfile index f5da66ad..5d21483e 100644 --- a/projects/seadata/builds/celery/Dockerfile +++ b/projects/seadata/builds/celery/Dockerfile @@ -1,4 +1,4 @@ -FROM rapydo/backendirods:0.6.5 +FROM rapydo/backendirods:0.6.6 MAINTAINER "Mattia D'Antonio " RUN apt-get update \ diff --git a/projects/seadata/confs/workers-only.yml b/projects/seadata/confs/workers-only.yml new file mode 100644 index 00000000..67d9e2af --- /dev/null +++ b/projects/seadata/confs/workers-only.yml @@ -0,0 +1,138 @@ +version: '3' + +services: + + backend: + environment: + ACTIVATE: 0 + + rabbit: + environment: + ACTIVATE: 0 + + mongodb: + environment: + ACTIVATE: 0 + + celeryui: + environment: + ACTIVATE: 0 + + celery: + restart: on-failure:5 + build: ${PROJECT_DIR}/builds/celery + image: ${COMPOSE_PROJECT_NAME}/celery:${RAPYDO_VERSION} + command: celery worker --concurrency=1 -Ofair -A restapi.flask_ext.flask_celery.worker.celery_app -Q celery -n worker-%h + volumes: + - ${RESOURCES_LOCALPATH}:${SEADATA_RESOURCES_MOUNTPOINT} + environment: + CELERY_EXTERNAL: 1 + ############################# + ACTIVATE: 1 + MAIN_LOGIN_ENABLE: 0 + DEBUG_ENDPOINTS: 1 + APP_MODE: production + DEBUG_LEVEL: ${LOG_LEVEL} + DOMAIN: ${PROJECT_DOMAIN} + SEADATA_PROJECT: ${SEADATA_PROJECT} + SEADATA_EDMO_CODE: ${SEADATA_EDMO_CODE} + SEADATA_API_IM_URL: ${SEADATA_API_IM_URL} + SEADATA_API_VERSION: ${SEADATA_API_VERSION} + # on rancher/celery host filesystem: + SEADATA_WORKSPACE_INGESTION: ${SEADATA_WORKSPACE_INGESTION} + SEADATA_WORKSPACE_ORDERS: ${SEADATA_WORKSPACE_ORDERS} + SEADATA_RESOURCES_MOUNTPOINT: ${SEADATA_RESOURCES_MOUNTPOINT} + REDIS_HOST: ${REDIS_HOST} + REDIS_PREFIX: ${REDIS_PREFIX} + IRODS_ENABLE: 1 + IRODS_HOST: ${IRODS_HOST} + IRODS_PORT: ${IRODS_PORT} + IRODS_USER: ${IRODS_USER} + IRODS_ZONE: ${IRODS_ZONE} + IRODS_HOME: ${IRODS_HOME} + IRODS_DN: ${IRODS_DN} + IRODS_PASSWORD: ${IRODS_PASSWORD} + IRODS_AUTHSCHEME: ${IRODS_AUTHSCHEME} + + ingestion_celery: + restart: on-failure:5 + build: ${PROJECT_DIR}/builds/celery + image: ${COMPOSE_PROJECT_NAME}/celery:${RAPYDO_VERSION} + entrypoint: docker-entrypoint-celery + command: celery worker --concurrency=1 -Ofair -A restapi.flask_ext.flask_celery.worker.celery_app -Q ingestion -n worker-%h + user: developer + working_dir: /code + volumes: + # configuration files + - ${SUBMODULE_DIR}/rapydo-confs/projects_defaults.yaml:/code/confs/projects_defaults.yaml + - ${PROJECT_DIR}/project_configuration.yaml:/code/confs/project_configuration.yaml + # Vanilla code + - ${PROJECT_DIR}/backend:/code/${COMPOSE_PROJECT_NAME} + # From project, if any + - ${EXTENDED_PROJECT_PATH}/backend:/code/${EXTENDED_PROJECT} + - ${EXTENDED_PROJECT_PATH}/project_configuration.yaml:/code/confs/extended_project_configuration.yaml + # JWT tokens secret + - jwt_tokens:${JWT_APP_SECRETS} + + - ${RESOURCES_LOCALPATH}:${SEADATA_RESOURCES_MOUNTPOINT} + networks: + db_net: + worker_net: + # depends_on: + # - rabbit + environment: + + CURRENT_UID: ${CURRENT_UID} + VANILLA_PACKAGE: ${COMPOSE_PROJECT_NAME} + EXTENDED_PACKAGE: ${EXTENDED_PROJECT} + JWT_APP_SECRETS: ${JWT_APP_SECRETS} + + CELERY_ENABLE: 1 + + CELERY_BROKER: ${CELERY_BROKER} + CELERY_BROKER_HOST: ${CELERY_BROKER_HOST} + CELERY_BROKER_PORT: ${CELERY_BROKER_PORT} + CELERY_BROKER_USER: ${CELERY_BROKER_USER} + CELERY_BROKER_PASSWORD: ${CELERY_BROKER_PASSWORD} + CELERY_BROKER_VHOST: ${CELERY_BROKER_VHOST} + + CELERY_BACKEND: ${CELERY_BACKEND} + CELERY_BACKEND_HOST: ${CELERY_BACKEND_HOST} + CELERY_BACKEND_PORT: ${CELERY_BACKEND_PORT} + CELERY_BACKEND_USER: ${CELERY_BACKEND_USER} + CELERY_BACKEND_PASSWORD: ${CELERY_BACKEND_PASSWORD} + + CELERY_EXTERNAL: 1 + ############################# + ACTIVATE: 1 + MAIN_LOGIN_ENABLE: 0 + DEBUG_ENDPOINTS: 1 + APP_MODE: production + DEBUG_LEVEL: ${LOG_LEVEL} + DOMAIN: ${PROJECT_DOMAIN} + SEADATA_PROJECT: ${SEADATA_PROJECT} + SEADATA_EDMO_CODE: ${SEADATA_EDMO_CODE} + SEADATA_API_IM_URL: ${SEADATA_API_IM_URL} + SEADATA_API_VERSION: ${SEADATA_API_VERSION} + # on rancher/celery host filesystem: + SEADATA_WORKSPACE_INGESTION: ${SEADATA_WORKSPACE_INGESTION} + SEADATA_WORKSPACE_ORDERS: ${SEADATA_WORKSPACE_ORDERS} + SEADATA_RESOURCES_MOUNTPOINT: ${SEADATA_RESOURCES_MOUNTPOINT} + REDIS_HOST: ${REDIS_HOST} + REDIS_PREFIX: ${REDIS_PREFIX} + IRODS_ENABLE: 1 + IRODS_HOST: ${IRODS_HOST} + IRODS_PORT: ${IRODS_PORT} + IRODS_USER: ${IRODS_USER} + IRODS_ZONE: ${IRODS_ZONE} + IRODS_HOME: ${IRODS_HOME} + IRODS_DN: ${IRODS_DN} + IRODS_PASSWORD: ${IRODS_PASSWORD} + IRODS_AUTHSCHEME: ${IRODS_AUTHSCHEME} + + SMTP_ADMIN: ${SMTP_ADMIN} + SMTP_NOREPLY: ${SMTP_NOREPLY} + SMTP_HOST: ${SMTP_HOST} + SMTP_PORT: ${SMTP_PORT} + SMTP_USERNAME: ${SMTP_USERNAME} + SMTP_PASSWORD: ${SMTP_PASSWORD} diff --git a/projects/seadata/confs/workers.yml b/projects/seadata/confs/workers.yml index 9bc67377..fac47c28 100644 --- a/projects/seadata/confs/workers.yml +++ b/projects/seadata/confs/workers.yml @@ -4,7 +4,7 @@ services: backend: environment: - ACTIVATE: + ACTIVATE: 0 rabbit: restart: always diff --git a/projects/seadata/project_configuration.yaml b/projects/seadata/project_configuration.yaml index 9ce1b6a2..cd52b5c4 100644 --- a/projects/seadata/project_configuration.yaml +++ b/projects/seadata/project_configuration.yaml @@ -5,8 +5,8 @@ project: title: SeaDataCloud HTTP-API description: Processing data in SeaDataCloud project - version: 1.0.6 - rapydo: 0.6.5 + version: 1.0.7 + rapydo: 0.6.6 extends: b2stage extends-from: projects @@ -17,7 +17,7 @@ tags: variables: env: - SEADATA_API_VERSION: 1.0.6.0002 + SEADATA_API_VERSION: 1.0.7.0002 LOG_LEVEL: INFO # global, but can be configured separately in .projectrc @@ -43,7 +43,7 @@ variables: SEADATA_WORKSPACE_ORDERS: orders # on filesystem (to be mounted to celery workers and rancher containers) # SEADATA_BATCH_PROD_FILES_COLL: production ## IM APIs - SEADATA_API_IM_URL: http://importmanager.seadatanet.org/api_v1 + SEADATA_API_IM_URL: https://importmanager.seadatanet.org/api_v1 ## RANCHER RESOURCES_URL: https://cattle.yourdomain.com/v2-beta RESOURCES_QCLABEL: qc diff --git a/projects/seadata/requirements.txt b/projects/seadata/requirements.txt index df8f22b7..14113601 100644 --- a/projects/seadata/requirements.txt +++ b/projects/seadata/requirements.txt @@ -1,2 +1,2 @@ -git+https://github.com/rapydo/utils.git@0.6.5 -git+https://github.com/rapydo/do.git@0.6.5 +git+https://github.com/rapydo/utils.git@0.6.6 +git+https://github.com/rapydo/do.git@0.6.6