diff --git a/lambda/refreshCacheSubmit/refresh_cache_submit.py b/lambda/refreshCacheSubmit/refresh_cache_submit.py index aa88b5a3..f7861387 100644 --- a/lambda/refreshCacheSubmit/refresh_cache_submit.py +++ b/lambda/refreshCacheSubmit/refresh_cache_submit.py @@ -2,25 +2,48 @@ def lambda_handler(event, context): from calcloud import common import boto3 import os + import dateutil.parser + from collections import OrderedDict gateway = boto3.client("storagegateway", config=common.retry_config) - # we only get two concurrent refresh cache operations, but we'll use them both to get some parallelization - # refresh every 5 minutes via EventBridge - refresh_1 = ["/messages/", "/inputs/", "/blackboard/"] - refresh_2 = ["/outputs/", "/control/", "/crds_env_vars/"] print(event) - try: - response = gateway.refresh_cache(FileShareARN=os.environ["FILESHARE"], FolderList=refresh_1, Recursive=True) - print(response) - except Exception as exc: - print(f"refresh cache failed for {refresh_1} with exception") - print(str(exc)) + event_time = event["time"] + dt = dateutil.parser.isoparse(event_time) - try: - response = gateway.refresh_cache(FileShareARN=os.environ["FILESHARE"], FolderList=refresh_2, Recursive=True) - print(response) - except Exception as exc: - print(f"refresh cache failed for {refresh_2} with exception") - print(str(exc)) + # run every time + rapid_fileshares = OrderedDict( + [ + ("blackboard", os.environ["FS_BLACKBOARD"]), + ("crds", os.environ["FS_CRDS"]), + ("messages", os.environ["FS_MESSAGES"]), + ("outputs", os.environ["FS_OUTPUTS"]), + ] + ) + + # ~once per hour + # inputs is never written from the cloud + # the only file someone may want quickly on-prem is the memModel features, + # but that one is written on-prem so doesn't need a refresh to be visible + infrequent_fileshares = OrderedDict([("inputs", os.environ["FS_INPUTS"]), ("control", os.environ["FS_CONTROL"])]) + + for fs_name in rapid_fileshares.keys(): + print(f"{'*'*10} refreshing cache for {fs_name} {'*'*10}") + try: + response = gateway.refresh_cache(FileShareARN=rapid_fileshares[fs_name], Recursive=True) + print(response) + except Exception as exc: + print(f"refresh cache failed for {fs_name} with exception") + print(str(exc)) + + # may run twice but it's better than missing an entire hour + if str(dt.minute)[0] in ("3"): + for fs_name in infrequent_fileshares.keys(): + print(f"{'*'*10} refreshing cache for {fs_name} {'*'*10}") + try: + response = gateway.refresh_cache(FileShareARN=infrequent_fileshares[fs_name], Recursive=True) + print(response) + except Exception as exc: + print(f"refresh cache failed for {fs_name} with exception") + print(str(exc)) diff --git a/terraform/deploy.sh b/terraform/deploy.sh old mode 100644 new mode 100755 index d7811e0d..ecc8242b --- a/terraform/deploy.sh +++ b/terraform/deploy.sh @@ -3,7 +3,7 @@ # ADMIN_ARN is set in the ci node env and should not be included in this deploy script # variables that will likely be changed frequently -CALCLOUD_VER="0.4.12" +CALCLOUD_VER="0.4.13" CALDP_VER="0.2.8" CAL_BASE_IMAGE="stsci/hst-pipeline:CALDP_20210415_CAL_final" diff --git a/terraform/lambda_refresh_cache.tf b/terraform/lambda_refresh_cache.tf index b6b78c66..991d62a2 100644 --- a/terraform/lambda_refresh_cache.tf +++ b/terraform/lambda_refresh_cache.tf @@ -42,6 +42,12 @@ module "calcloud_lambda_refresh_cache_submit" { lambda_role = data.aws_ssm_parameter.lambda_refreshCacheSubmit_role.value environment_variables = merge(local.common_env_vars, { + FS_BLACKBOARD = data.aws_ssm_parameter.fs_blackboard_arn.value, + FS_CONTROL = data.aws_ssm_parameter.fs_control_arn.value, + FS_CRDS = data.aws_ssm_parameter.fs_crds_arn.value, + FS_INPUTS = data.aws_ssm_parameter.fs_inputs_arn.value, + FS_MESSAGES = data.aws_ssm_parameter.fs_messages_arn.value, + FS_OUTPUTS = data.aws_ssm_parameter.fs_outputs_arn.value }) tags = { diff --git a/terraform/locals.tf b/terraform/locals.tf index 22fc627c..e9db1108 100644 --- a/terraform/locals.tf +++ b/terraform/locals.tf @@ -94,7 +94,6 @@ locals { CALCLOUD_ENVIRONMENT = local.environment, JOBDEFINITIONS = local.job_definitions, JOBQUEUES = local.job_queues, - FILESHARE=data.aws_ssm_parameter.file_share_arn.value, BUCKET=aws_s3_bucket.calcloud.id, } diff --git a/terraform/parameters.tf b/terraform/parameters.tf index 391e3a70..4730ee60 100644 --- a/terraform/parameters.tf +++ b/terraform/parameters.tf @@ -62,10 +62,31 @@ data aws_ssm_parameter lambda_refreshCacheSubmit_role { name = "/iam/roles/calcloud_lambda_refreshCacheSubmit" } -data aws_ssm_parameter file_share_arn { - name = "/gateway/fileshare" +data aws_ssm_parameter fs_blackboard_arn { + name = "/gateway/fileshare/blackboard" } +data aws_ssm_parameter fs_control_arn { + name = "/gateway/fileshare/control" +} + +data aws_ssm_parameter fs_crds_arn { + name = "/gateway/fileshare/crds_env_vars" +} + +data aws_ssm_parameter fs_inputs_arn { + name = "/gateway/fileshare/inputs" +} + +data aws_ssm_parameter fs_messages_arn { + name = "/gateway/fileshare/messages" +} + +data aws_ssm_parameter fs_outputs_arn { + name = "/gateway/fileshare/outputs" +} + + data aws_ssm_parameter lambda_rescue_role { name = "/iam/roles/calcloud_lambda_rescue" } @@ -77,3 +98,5 @@ data aws_ssm_parameter crds_ops { data aws_ssm_parameter crds_test { name = "/s3/external/crds-test" } + +