Skip to content

Commit

Permalink
file share / cache refresh refactor (#114)
Browse files Browse the repository at this point in the history
* cache refresh refactoring for multiple file shares

* use OrderedDicts for determinism

* deploy updates in prep for release
  • Loading branch information
bhayden53 authored May 5, 2021
1 parent 65059c6 commit 3eff76d
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 20 deletions.
55 changes: 39 additions & 16 deletions lambda/refreshCacheSubmit/refresh_cache_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,48 @@ def lambda_handler(event, context):
from calcloud import common
import boto3
import os
import dateutil.parser
from collections import OrderedDict

gateway = boto3.client("storagegateway", config=common.retry_config)
# we only get two concurrent refresh cache operations, but we'll use them both to get some parallelization
# refresh every 5 minutes via EventBridge
refresh_1 = ["/messages/", "/inputs/", "/blackboard/"]
refresh_2 = ["/outputs/", "/control/", "/crds_env_vars/"]

print(event)

try:
response = gateway.refresh_cache(FileShareARN=os.environ["FILESHARE"], FolderList=refresh_1, Recursive=True)
print(response)
except Exception as exc:
print(f"refresh cache failed for {refresh_1} with exception")
print(str(exc))
event_time = event["time"]
dt = dateutil.parser.isoparse(event_time)

try:
response = gateway.refresh_cache(FileShareARN=os.environ["FILESHARE"], FolderList=refresh_2, Recursive=True)
print(response)
except Exception as exc:
print(f"refresh cache failed for {refresh_2} with exception")
print(str(exc))
# run every time
rapid_fileshares = OrderedDict(
[
("blackboard", os.environ["FS_BLACKBOARD"]),
("crds", os.environ["FS_CRDS"]),
("messages", os.environ["FS_MESSAGES"]),
("outputs", os.environ["FS_OUTPUTS"]),
]
)

# ~once per hour
# inputs is never written from the cloud
# the only file someone may want quickly on-prem is the memModel features,
# but that one is written on-prem so doesn't need a refresh to be visible
infrequent_fileshares = OrderedDict([("inputs", os.environ["FS_INPUTS"]), ("control", os.environ["FS_CONTROL"])])

for fs_name in rapid_fileshares.keys():
print(f"{'*'*10} refreshing cache for {fs_name} {'*'*10}")
try:
response = gateway.refresh_cache(FileShareARN=rapid_fileshares[fs_name], Recursive=True)
print(response)
except Exception as exc:
print(f"refresh cache failed for {fs_name} with exception")
print(str(exc))

# may run twice but it's better than missing an entire hour
if str(dt.minute)[0] in ("3"):
for fs_name in infrequent_fileshares.keys():
print(f"{'*'*10} refreshing cache for {fs_name} {'*'*10}")
try:
response = gateway.refresh_cache(FileShareARN=infrequent_fileshares[fs_name], Recursive=True)
print(response)
except Exception as exc:
print(f"refresh cache failed for {fs_name} with exception")
print(str(exc))
2 changes: 1 addition & 1 deletion terraform/deploy.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ADMIN_ARN is set in the ci node env and should not be included in this deploy script

# variables that will likely be changed frequently
CALCLOUD_VER="0.4.12"
CALCLOUD_VER="0.4.13"
CALDP_VER="0.2.8"
CAL_BASE_IMAGE="stsci/hst-pipeline:CALDP_20210415_CAL_final"

Expand Down
6 changes: 6 additions & 0 deletions terraform/lambda_refresh_cache.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ module "calcloud_lambda_refresh_cache_submit" {
lambda_role = data.aws_ssm_parameter.lambda_refreshCacheSubmit_role.value

environment_variables = merge(local.common_env_vars, {
FS_BLACKBOARD = data.aws_ssm_parameter.fs_blackboard_arn.value,
FS_CONTROL = data.aws_ssm_parameter.fs_control_arn.value,
FS_CRDS = data.aws_ssm_parameter.fs_crds_arn.value,
FS_INPUTS = data.aws_ssm_parameter.fs_inputs_arn.value,
FS_MESSAGES = data.aws_ssm_parameter.fs_messages_arn.value,
FS_OUTPUTS = data.aws_ssm_parameter.fs_outputs_arn.value
})

tags = {
Expand Down
1 change: 0 additions & 1 deletion terraform/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ locals {
CALCLOUD_ENVIRONMENT = local.environment,
JOBDEFINITIONS = local.job_definitions,
JOBQUEUES = local.job_queues,
FILESHARE=data.aws_ssm_parameter.file_share_arn.value,
BUCKET=aws_s3_bucket.calcloud.id,
}

Expand Down
27 changes: 25 additions & 2 deletions terraform/parameters.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,31 @@ data aws_ssm_parameter lambda_refreshCacheSubmit_role {
name = "/iam/roles/calcloud_lambda_refreshCacheSubmit"
}

data aws_ssm_parameter file_share_arn {
name = "/gateway/fileshare"
data aws_ssm_parameter fs_blackboard_arn {
name = "/gateway/fileshare/blackboard"
}

data aws_ssm_parameter fs_control_arn {
name = "/gateway/fileshare/control"
}

data aws_ssm_parameter fs_crds_arn {
name = "/gateway/fileshare/crds_env_vars"
}

data aws_ssm_parameter fs_inputs_arn {
name = "/gateway/fileshare/inputs"
}

data aws_ssm_parameter fs_messages_arn {
name = "/gateway/fileshare/messages"
}

data aws_ssm_parameter fs_outputs_arn {
name = "/gateway/fileshare/outputs"
}


data aws_ssm_parameter lambda_rescue_role {
name = "/iam/roles/calcloud_lambda_rescue"
}
Expand All @@ -77,3 +98,5 @@ data aws_ssm_parameter crds_ops {
data aws_ssm_parameter crds_test {
name = "/s3/external/crds-test"
}


0 comments on commit 3eff76d

Please sign in to comment.