Skip to content

Commit

Permalink
Merge pull request #177 from spacetelescope/release/v0.4.34
Browse files Browse the repository at this point in the history
Release/v0.4.34
  • Loading branch information
bhayden53 authored Mar 2, 2022
2 parents 3edcce3 + fabb23e commit 6ba5844
Show file tree
Hide file tree
Showing 129 changed files with 1,334 additions and 620,282 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: pytest

on: [push, pull_request]

jobs:
coverage:
name: code coverage check
runs-on: ubuntu-18.04

steps:
- name: set up python 3.7.12
uses: actions/setup-python@v2
with:
python-version: 3.7.12

- name: checkout code
uses: actions/checkout@v2

- name: install requirements
run: pip install .[dev]

- name: run pytest code coverage check
run: pytest --cov=lambda --cov-fail-under 75 -rP

pytest:
name: pytest
runs-on: ubuntu-18.04

steps:
- name: set up python 3.7.12
uses: actions/setup-python@v2
with:
python-version: 3.7.12

- name: checkout code
uses: actions/checkout@v2

- name: install requirements
run: pip install .[dev]

- name: run pytest
run: pytest -rP
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ scripts/*.json

testing/predict_lambda_tests/*

tests/test_artifacts/*

*.DS_Store
1 change: 0 additions & 1 deletion ami_rotation/ami_rotation_userdata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ pwd
date '+%Y-%m-%d %H:%M:%S'

yum install -y -q gcc libpng-devel libjpeg-devel unzip yum-utils
yum update -y -q && yum upgrade -q
cd /home/ec2-user
curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip -qq awscliv2.zip
Expand Down
5 changes: 3 additions & 2 deletions calcloud/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"clean",
]

MAX_BROADCAST_MSGS = 10 ** 6 # safety
MAX_BROADCAST_MSGS = 10**6 # safety

# -------------------------------------------------------------

Expand Down Expand Up @@ -832,10 +832,11 @@ def get_io_bundle(bucket=s3.DEFAULT_BUCKET, client=None):
"""Return the IoBundle defined by root S3 `bucket` and accessed using
S3 `client`.
"""
_reject_cross_env_bucket(bucket)
return IoBundle(bucket, client)


def reject_cross_env_bucket(bucket):
def _reject_cross_env_bucket(bucket):
"""Raise an exception if `bucket` does not match BUCKET in os.environ in order to
short circuit lambdas executing based on events from other environments.
"""
Expand Down
2 changes: 1 addition & 1 deletion calcloud/model_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def convert_target_data(self):
kb += np.float(memstr)
print(kb)
targets["wallclock"] = clock + 1
targets["memory"] = kb / (10 ** 6)
targets["memory"] = kb / (10**6)
targets["mem_bin"] = self.calculate_bin(targets["memory"])
print("Targets:\n", targets)
return targets
Expand Down
2 changes: 1 addition & 1 deletion calcloud/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_default_client():

DEFAULT_BUCKET = "s3://" + os.environ.get("BUCKET", "calcloud-UNDEFINED-bucket")

MAX_LIST_OBJECTS = 10 ** 7
MAX_LIST_OBJECTS = 10**7

# -------------------------------------------------------------

Expand Down
11 changes: 10 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
- force update of nss due to critical security vulnerability
- pytest via moto (no need for aws creds)
- fix ami-rotation issues caused by yum
- updated memory model training
- bugfix; blackboard 'createdAt' default value supplied (hotfixed previously)
- significantly reduce frequency of blackboard lambda to avoid problems with the file in the storage gateway
- refactor terraform for ami rotation user-data
- terraform package version bumps
- remove terraformed ecr repo in each account; use central ecr via ssm param instead
- significant refactor of deployment scripts to handle central ecr and image promotion/deletion that goes with it
- cache refresh period increased to 9 minutes from 5 (continued gateway tweaks)
Binary file modified lambda/JobPredict/models/mem_clf/saved_model.pb
Binary file not shown.
Binary file not shown.
Binary file modified lambda/JobPredict/models/mem_clf/variables/variables.index
Binary file not shown.
Binary file not shown.
Binary file modified lambda/JobPredict/models/mem_clf/weights/ckpt.index
Binary file not shown.
Binary file modified lambda/JobPredict/models/mem_reg/saved_model.pb
Binary file not shown.
Binary file not shown.
Binary file modified lambda/JobPredict/models/mem_reg/variables/variables.index
Binary file not shown.
Binary file not shown.
Binary file modified lambda/JobPredict/models/mem_reg/weights/ckpt.index
Binary file not shown.
2 changes: 1 addition & 1 deletion lambda/JobPredict/models/pt_transform
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"f_lambda": -1.53012642308238, "s_lambda": -0.08805165507126637, "f_mean": 0.5690156475654576, "f_sigma": 0.04177326202270394, "s_mean": 2.0786578109854394, "s_sigma": 1.2068026984185416}
{"f_lambda": -1.5332594992648665, "s_lambda": -0.08870971012686016, "f_mean": 0.5681481130491869, "f_sigma": 0.04156875090896417, "s_mean": 2.07509602630443, "s_sigma": 1.2048559176208944}
Binary file modified lambda/JobPredict/models/wall_reg/saved_model.pb
Binary file not shown.
Binary file not shown.
Binary file modified lambda/JobPredict/models/wall_reg/variables/variables.index
Binary file not shown.
Binary file not shown.
Binary file modified lambda/JobPredict/models/wall_reg/weights/ckpt.index
Binary file not shown.
2 changes: 0 additions & 2 deletions lambda/batch_events/batch_event_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ def lambda_handler(event, context):
exit_code = container.get("exitCode", "undefined")
exit_reason = exit_codes.explain(exit_code) if exit_code != "undefined" else exit_code

io.reject_cross_env_bucket(bucket)

comm = io.get_io_bundle(bucket)

metadata = comm.xdata.get(ipppssoot)
Expand Down
3 changes: 2 additions & 1 deletion lambda/blackboard/scrape_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ def lambda_handler(event, context):
jobs = page["jobSummaryList"]
print(f"handling {len(jobs)} jobs from {q} in {jobStatus} status...")
for j in jobs:
print(j)
jobId = j["jobId"]

submitDate = int(j["createdAt"] / 1000.0)
submitDate = int(j.get("createdAt", default_timestamp) / 1000.0)

jobStartDate = int(j.get("startedAt", default_timestamp) / 1000.0)
completionDate = int(j.get("stoppedAt", default_timestamp) / 1000.0)
Expand Down
9 changes: 7 additions & 2 deletions lambda/refreshCacheSubmit/refresh_cache_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def lambda_handler(event, context):
# run every time
rapid_fileshares = OrderedDict(
[
("blackboard", os.environ["FS_BLACKBOARD"]),
("crds", os.environ["FS_CRDS"]),
("messages", os.environ["FS_MESSAGES"]),
("outputs", os.environ["FS_OUTPUTS"]),
Expand All @@ -26,7 +25,13 @@ def lambda_handler(event, context):
# inputs is never written from the cloud
# the only file someone may want quickly on-prem is the memModel features,
# but that one is written on-prem so doesn't need a refresh to be visible
infrequent_fileshares = OrderedDict([("inputs", os.environ["FS_INPUTS"]), ("control", os.environ["FS_CONTROL"])])
infrequent_fileshares = OrderedDict(
[
("inputs", os.environ["FS_INPUTS"]),
("control", os.environ["FS_CONTROL"]),
("blackboard", os.environ["FS_BLACKBOARD"]),
]
)

for fs_name in rapid_fileshares.keys():
print(f"{'*'*10} refreshing cache for {fs_name} {'*'*10}")
Expand Down
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ setup_requires =
setuptools >=41.0.1
install_requires =
boto3
botocore
pyyaml

[options.extras_require]
Expand All @@ -26,8 +27,11 @@ dev =
flake8
pytest
pytest-cov
pytest-doctestplus
tox
bandit
moto[all]
docker

[flake8]
ignore = E501, E203, W503
Expand Down
25 changes: 11 additions & 14 deletions terraform/ami-rotation.tf
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
data "template_file" "ami_rotation_userdata" {
template = file("${path.module}/../ami_rotation/ami_rotation_userdata.sh")
vars = {
environment = var.environment,
admin_arn = nonsensitive(data.aws_ssm_parameter.admin_arn.value),
calcloud_ver = var.awsysver,
log_group = aws_cloudwatch_log_group.ami-rotation.name
}
}

resource "aws_launch_template" "ami_rotation" {
name = "calcloud-ami-rotation${local.environment}"
description = "launch template for running ami rotation via terraform"
Expand All @@ -17,7 +7,14 @@ resource "aws_launch_template" "ami_rotation" {
tags = {
"Name" = "calcloud-ami-rotation${local.environment}"
}
user_data = base64encode(data.template_file.ami_rotation_userdata.rendered)
user_data = base64encode(
templatefile("${path.module}/../ami_rotation/ami_rotation_userdata.sh", {
environment = var.environment,
admin_arn = nonsensitive(data.aws_ssm_parameter.admin_arn.value),
calcloud_ver = var.awsysver,
log_group = aws_cloudwatch_log_group.ami-rotation.name
})
)

vpc_security_group_ids = local.batch_sgs
instance_type = "t3.large"
Expand Down Expand Up @@ -60,7 +57,7 @@ resource "aws_launch_template" "ami_rotation" {

module "calcloud_env_amiRotation" {
source = "terraform-aws-modules/lambda/aws"
version = "~> 1.43.0"
version = "~> 2.26.0"

function_name = "calcloud-env-AmiRotation${local.environment}"
description = "spawns an ec2 bi-weekly which rotates the ami for batch"
Expand Down Expand Up @@ -116,13 +113,13 @@ resource "aws_cloudwatch_event_rule" "ami-rotate-scheduler" {
resource "aws_cloudwatch_event_target" "ami-rotate-scheduler" {
rule = aws_cloudwatch_event_rule.ami-rotate-scheduler.name
target_id = "lambda"
arn = module.calcloud_env_amiRotation.this_lambda_function_arn
arn = module.calcloud_env_amiRotation.lambda_function_arn
}

resource "aws_lambda_permission" "allow_lambda_exec_ami_rotate" {
statement_id = "AllowExecutionFromCloudWatch"
action = "lambda:InvokeFunction"
function_name = module.calcloud_env_amiRotation.this_lambda_function_name
function_name = module.calcloud_env_amiRotation.lambda_function_name
principal = "events.amazonaws.com"
source_arn = aws_cloudwatch_event_rule.ami-rotate-scheduler.arn
}
55 changes: 5 additions & 50 deletions terraform/batch.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@ terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.42.0"
}
hashicorp-template = {
source = "hashicorp/template"
version = "~> 2.2.0"
version = "~> 3.65.0"
}
hashicorp-null = {
source = "hashicorp/null"
Expand All @@ -32,17 +28,12 @@ terraform {
}
docker = {
source = "kreuzwerker/docker"
version = "~> 2.11.0"
version = "~> 2.15.0"
}
}
}

data "template_file" "userdata" {
template = file("${path.module}/user_data.sh")
vars = {
// Any var you need to pass to the script
}
}
# See also lambda module version in each lambda .tf file

resource "aws_launch_template" "hstdp" {
# IF YOU CHANGE THE LAUNCH TEMPLATE YOU MUST "TAINT" THE COMPUTE ENVIRONMENT BEFORE DEPLOY
Expand All @@ -58,7 +49,7 @@ resource "aws_launch_template" "hstdp" {
"Name" = "calcloud-hst-worker${local.environment}"
"calcloud-hst" = "calcloud-hst-worker${local.environment}"
}
user_data = base64encode(data.template_file.userdata.rendered)
user_data = base64encode(templatefile("${path.module}/user_data.sh", {}))

vpc_security_group_ids = local.batch_sgs

Expand Down Expand Up @@ -141,42 +132,6 @@ resource "aws_batch_compute_environment" "compute_env" {
}
}

resource "aws_ecr_repository" "caldp_ecr" {
name = "caldp${local.environment}"
image_scanning_configuration {
scan_on_push = true
}
}

resource "aws_ecr_lifecycle_policy" "ecr_lifecycle" {
repository = aws_ecr_repository.caldp_ecr.name

policy = <<EOF
{
"rules": [
{
"rulePriority": 1,
"description": "Expire untagged images older than 7 days",
"selection": {
"tagStatus": "untagged",
"countType": "sinceImagePushed",
"countUnit": "days",
"countNumber": 7
},
"action": {
"type": "expire"
}
}
]
}
EOF
}

data "aws_ecr_image" "caldp_latest" {
repository_name = aws_ecr_repository.caldp_ecr.name
image_tag = var.image_tag
}

# ------------------------------------------------------------------------------------------

# Env setting to simulate caught errors:
Expand All @@ -195,7 +150,7 @@ resource "aws_batch_job_definition" "job_def" {
{"name": "CSYS_VER", "value": "${var.csys_ver}"},
{"name": "CRDSBUCKET", "value": "${local.crds_bucket}"}
],
"image": "${aws_ecr_repository.caldp_ecr.repository_url}:${data.aws_ecr_image.caldp_latest.image_tag}",
"image": "${local.ecr_caldp_batch_image}",
"jobRoleArn": "${nonsensitive(data.aws_ssm_parameter.batch_job_role.value)}",
"executionRoleArn": "${nonsensitive(data.aws_ssm_parameter.batch_exec.value)}",
"user": "developer",
Expand Down
Loading

0 comments on commit 6ba5844

Please sign in to comment.