From 7e0088b19c60828663d74305e171fcb748aec3b7 Mon Sep 17 00:00:00 2001
From: Chris Gilmer <chris@truss.works>
Date: Fri, 8 Nov 2019 11:55:29 -0800
Subject: [PATCH 1/5] Allow local testing of lambdas

---
 .envrc                    | 72 +++++++++++++++++++++++++++++++++++++++
 .envrc.local.template     | 43 +++++++++++++++++++++++
 .gitignore                |  7 ++++
 Dockerfile                | 45 ++++++++++++++++++++++++
 Makefile                  | 25 +++++++-------
 README.md                 | 25 ++++++++++++++
 build_lambda.sh           | 47 -------------------------
 scripts/run-scan-lambda   | 46 +++++++++++++++++++++++++
 scripts/run-update-lambda | 23 +++++++++++++
 9 files changed, 273 insertions(+), 60 deletions(-)
 create mode 100644 .envrc
 create mode 100644 .envrc.local.template
 create mode 100644 Dockerfile
 delete mode 100755 build_lambda.sh
 create mode 100755 scripts/run-scan-lambda
 create mode 100755 scripts/run-update-lambda

diff --git a/.envrc b/.envrc
new file mode 100644
index 00000000..19328600
--- /dev/null
+++ b/.envrc
@@ -0,0 +1,72 @@
+#! /usr/bin/env bash
+
+##########################################
+# DO NOT MAKE LOCAL CHANGES TO THIS FILE #
+#                                        #
+# Vars in this file can be overridden by #
+# exporting them in .envrc.local         #
+##########################################
+
+# Add local paths for binaries and scripts
+PATH_add ./scripts
+
+# ShellCheck complains about things like `foo=$(cmd)` because you lose the
+# return value of `cmd`. That said, we're not using `set -e`, so we aren't
+# really concerned about return values. The following `true`, applies the
+# rule to the entire file.
+# See: https://github.com/koalaman/shellcheck/wiki/SC2155
+# shellcheck disable=SC2155
+true
+
+required_vars=()
+var_docs=()
+
+# Declare an environment variable as required.
+#
+#   require VAR_NAME "Documentation about how to define valid values"
+require() {
+  required_vars+=("$1")
+  var_docs+=("$2")
+}
+
+# Check all variables declared as required. If any are missing, print a message and
+# exit with a non-zero status.
+check_required_variables() {
+  for i in "${!required_vars[@]}"; do
+    var=${required_vars[i]}
+    if [[ -z "${!var}" ]]; then
+      log_status "${var} is not set: ${var_docs[i]}"
+      missing_var=true
+    fi
+  done
+
+  if [[ $missing_var == "true" ]]; then
+    log_error "Your environment is missing some variables!"
+    log_error "Set the above variables in .envrc.local and try again."
+  fi
+}
+
+#########################
+# Project Configuration #
+#########################
+
+require AV_DEFINITION_S3_BUCKET "Add this variable to your .envrc.local"
+require AV_DEFINITION_S3_PREFIX "Add this variable to your .envrc.local"
+
+require TEST_BUCKET "Add this variable to your .envrc.local"
+require TEST_KEY "Add this variable to your .envrc.local"
+
+##############################################
+# Load Local Overrides and Check Environment #
+##############################################
+
+# Load a local overrides file. Any changes you want to make for your local
+# environment should live in that file.
+
+if [ -e .envrc.local ]
+then
+  source_env .envrc.local
+fi
+
+# Check that all required environment variables are set
+check_required_variables
diff --git a/.envrc.local.template b/.envrc.local.template
new file mode 100644
index 00000000..8bd08938
--- /dev/null
+++ b/.envrc.local.template
@@ -0,0 +1,43 @@
+#! /usr/bin/env bash
+
+#
+# Copy this file `cp .envrc.local.template .envrc.local` and modify the variables below for testing
+#
+
+# Optional AWS Parameters
+# WARNING: It's not recommended to keep credentials in this file!
+# export AWS_ACCESS_KEY_ID
+# export AWS_DEFAULT_REGION
+# export AWS_REGION
+# export AWS_SECRET_ACCESS_KEY
+# export AWS_SESSION_TOKEN
+
+# Required for both scan and update lambdas scripts
+export AV_DEFINITION_S3_BUCKET=""
+export AV_DEFINITION_S3_PREFIX=""
+
+# Required for scan lambda script
+export TEST_BUCKET=""
+export TEST_KEY=""
+
+# Uncomment and change as needed for lambda scripts
+# export AV_DEFINITION_FILE_PREFIXES
+# export AV_DEFINITION_FILE_SUFFIXES
+# export AV_DEFINITION_PATH
+# export AV_DELETE_INFECTED_FILES
+# export AV_PROCESS_ORIGINAL_VERSION_ONLY
+# export AV_SCAN_START_METADATA
+# export AV_SCAN_START_SNS_ARN
+# export AV_SIGNATURE_METADATA
+# export AV_SIGNATURE_OK
+# export AV_SIGNATURE_UNKNOWN
+# export AV_STATUS_CLEAN
+# export AV_STATUS_INFECTED
+# export AV_STATUS_METADATA
+# export AV_STATUS_SNS_ARN
+# export AV_STATUS_SNS_PUBLISH_CLEAN
+# export AV_STATUS_SNS_PUBLISH_INFECTED
+# export AV_TIMESTAMP_METADATA
+# export CLAMAVLIB_PATH
+# export CLAMSCAN_PATH
+# export FRESHCLAM_PATH
diff --git a/.gitignore b/.gitignore
index c3744a26..da9084e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -110,3 +110,10 @@ ENV/
 .coverage
 
 .DS_Store
+tmp/
+
+# direnv
+.envrc.local
+
+# EICAR Files
+*eicar*
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..2805b357
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,45 @@
+FROM amazonlinux:2
+
+# Set up working directories
+RUN mkdir -p /opt/app
+RUN mkdir -p /opt/app/build
+RUN mkdir -p /opt/app/bin/
+
+# Copy in the lambda source
+WORKDIR /opt/app
+COPY ./*.py /opt/app/
+COPY requirements.txt /opt/app/requirements.txt
+
+# Install packages
+RUN yum update -y
+RUN yum install -y cpio python2-pip yum-utils zip unzip less
+RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+
+# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel
+RUN pip install -r requirements.txt
+RUN rm -rf /root/.cache/pip
+
+# Download libraries we need to run in lambda
+WORKDIR /tmp
+RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2
+RUN rpm2cpio clamav-0*.rpm | cpio -idmv
+RUN rpm2cpio clamav-lib*.rpm | cpio -idmv
+RUN rpm2cpio clamav-update*.rpm | cpio -idmv
+RUN rpm2cpio json-c*.rpm | cpio -idmv
+RUN rpm2cpio pcre*.rpm | cpio -idmv
+
+# Copy over the binaries and libraries
+RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/
+
+# Fix the freshclam.conf settings
+RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf
+RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf
+
+# Create the zip file
+WORKDIR /opt/app
+RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin
+
+WORKDIR /usr/lib/python2.7/site-packages
+RUN zip -r9 /opt/app/build/lambda.zip *
+
+WORKDIR /opt/app
diff --git a/Makefile b/Makefile
index 7eb87562..a527b641 100644
--- a/Makefile
+++ b/Makefile
@@ -27,25 +27,16 @@ all: archive  ## Build the entire project
 clean:  ## Clean build artifacts
 	rm -rf bin/
 	rm -rf build/
+	rm -rf tmp/
 	rm -f .coverage
 	find ./ -type d -name '__pycache__' -delete
 	find ./ -type f -name '*.pyc' -delete
 
 .PHONY: archive
 archive: clean  ## Create the archive for AWS lambda
-ifeq ($(circleci), true)
-	docker create -v $(container_dir) --name src alpine:3.4 /bin/true
-	docker cp $(current_dir)/. src:$(container_dir)
-	docker run --rm -ti \
-		--volumes-from src \
-		amazonlinux:$(AMZ_LINUX_VERSION) \
-		/bin/bash -c "cd $(container_dir) && ./build_lambda.sh"
-else
-	docker run --rm -ti \
-		-v $(current_dir):$(container_dir) \
-		amazonlinux:$(AMZ_LINUX_VERSION) \
-		/bin/bash -c "cd $(container_dir) && ./build_lambda.sh"
-endif
+	docker build -t bucket-antivirus-function:latest .
+	mkdir -p ./build/
+	docker run -v $(current_dir)/build:/opt/mount --rm --entrypoint cp bucket-antivirus-function:latest /opt/app/build/lambda.zip /opt/mount/lambda.zip
 
 .PHONY: pre_commit_install  ## Ensure that pre-commit hook is installed and kept up to date
 pre_commit_install: .git/hooks/pre-commit ## Ensure pre-commit is installed
@@ -65,3 +56,11 @@ test: clean  ## Run python tests
 .PHONY: coverage
 coverage: clean  ## Run python tests with coverage
 	nosetests --with-coverage
+
+.PHONY: scan
+scan: ./build/lambda.zip ## Run scan function locally
+	scripts/run-scan-lambda $(TEST_BUCKET) $(TEST_KEY)
+
+.PHONY: update
+update: ./build/lambda.zip ## Run update function locally
+	scripts/run-update-lambda
diff --git a/README.md b/README.md
index 558835c7..33ecc7b6 100644
--- a/README.md
+++ b/README.md
@@ -367,6 +367,31 @@ pip install -r requirements-dev.txt
 make test
 ```
 
+### Local lambdas
+
+You can run the lambdas locally to test out what they are doing without deploying to AWS. This is accomplished
+by using docker containers that act similarly to lambda. You will need to have set up some local variables in your
+`.envrc.local` file and modify them appropriately first before running `direnv allow`. If you do not have `direnv`
+it can be installed with `brew install direnv`.
+
+For the Scan lambda you will need a test file uploaded to S3 and the variables `TEST_BUCKET` and `TEST_KEY`
+set in your `.envrc.local` file. Then you can run:
+
+```sh
+direnv allow
+make archive scan
+```
+
+If you want a file that will be recognized as a virus you can download a test file from the [EICAR](https://www.eicar.org/?page_id=3950)
+website and uploaded to your bucket.
+
+For the Update lambda you can run:
+
+```sh
+direnv allow
+make archive update
+```
+
 ## License
 
 ```text
diff --git a/build_lambda.sh b/build_lambda.sh
deleted file mode 100755
index 11d0ab46..00000000
--- a/build_lambda.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env bash
-
-# Upside Travel, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-lambda_output_file=/opt/app/build/lambda.zip
-
-set -e
-
-yum update -y
-yum install -y cpio python2-pip yum-utils zip
-yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-pip install --no-cache-dir virtualenv
-virtualenv env
-. env/bin/activate
-# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel
-pip install -r requirements.txt
-rm -rf /root/.cache/pip
-
-pushd /tmp
-yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2
-rpm2cpio clamav-0*.rpm | cpio -idmv
-rpm2cpio clamav-lib*.rpm | cpio -idmv
-rpm2cpio clamav-update*.rpm | cpio -idmv
-rpm2cpio json-c*.rpm | cpio -idmv
-rpm2cpio pcre*.rpm | cpio -idmv
-popd
-mkdir -p bin
-cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* bin/.
-echo "DatabaseMirror database.clamav.net" > bin/freshclam.conf
-echo "CompressLocalDatabase yes" >> bin/freshclam.conf
-
-mkdir -p build
-zip -r9 $lambda_output_file *.py bin
-cd env/lib/python2.7/site-packages
-zip -r9 $lambda_output_file *
diff --git a/scripts/run-scan-lambda b/scripts/run-scan-lambda
new file mode 100755
index 00000000..1254a829
--- /dev/null
+++ b/scripts/run-scan-lambda
@@ -0,0 +1,46 @@
+#! /usr/bin/env bash
+
+set -eu -o pipefail
+
+#
+# Run the scan.lambda_handler locally in a docker container
+#
+
+if [ $# -lt 2 ]; then
+  echo 1>&2 "$0: not enough arguments. Please provide BUCKET and KEY"
+  exit 1
+fi
+
+BUCKET=$1
+KEY=$2
+EVENT="{\"Records\": [{\"s3\": {\"bucket\": {\"name\": \"${BUCKET}\"}, \"object\": {\"key\": \"${KEY}\"}}}]}"
+echo "Sending S3 event: ${EVENT}"
+
+# Verify that the file exists first
+aws s3 ls "s3://${BUCKET}/${KEY}"
+
+rm -rf tmp/
+unzip -qq -d ./tmp build/lambda.zip
+
+docker run --rm \
+  -v "$(pwd)/tmp/:/var/task" \
+  -e AV_DEFINITION_S3_BUCKET \
+  -e AV_DEFINITION_S3_PREFIX \
+  -e AV_DELETE_INFECTED_FILES \
+  -e AV_PROCESS_ORIGINAL_VERSION_ONLY \
+  -e AV_SCAN_START_METADATA \
+  -e AV_SCAN_START_SNS_ARN \
+  -e AV_SIGNATURE_METADATA \
+  -e AV_STATUS_CLEAN \
+  -e AV_STATUS_INFECTED \
+  -e AV_STATUS_METADATA \
+  -e AV_STATUS_SNS_ARN \
+  -e AV_STATUS_SNS_PUBLISH_CLEAN \
+  -e AV_STATUS_SNS_PUBLISH_INFECTED \
+  -e AV_TIMESTAMP_METADATA \
+  -e AWS_ACCESS_KEY_ID \
+  -e AWS_DEFAULT_REGION \
+  -e AWS_REGION \
+  -e AWS_SECRET_ACCESS_KEY \
+  -e AWS_SESSION_TOKEN \
+  lambci/lambda:python2.7 scan.lambda_handler "${EVENT}"
diff --git a/scripts/run-update-lambda b/scripts/run-update-lambda
new file mode 100755
index 00000000..47ccf18e
--- /dev/null
+++ b/scripts/run-update-lambda
@@ -0,0 +1,23 @@
+#! /usr/bin/env bash
+
+set -eu -o pipefail
+
+#
+# Run the update.lambda_handler locally in a docker container
+#
+
+rm -rf tmp/
+unzip -qq -d ./tmp build/lambda.zip
+
+docker run --rm \
+  -v "$(pwd)/tmp/:/var/task" \
+  -e AV_DEFINITION_PATH \
+  -e AV_DEFINITION_S3_BUCKET \
+  -e AV_DEFINITION_S3_PREFIX \
+  -e AWS_ACCESS_KEY_ID \
+  -e AWS_DEFAULT_REGION \
+  -e AWS_REGION \
+  -e AWS_SECRET_ACCESS_KEY \
+  -e AWS_SESSION_TOKEN \
+  -e CLAMAVLIB_PATH \
+  lambci/lambda:python2.7 update.lambda_handler

From e96e42b16fec9a8aa0d18c0b1228378a65f38f85 Mon Sep 17 00:00:00 2001
From: Chris Gilmer <chris@truss.works>
Date: Fri, 8 Nov 2019 12:31:08 -0800
Subject: [PATCH 2/5] Limit resources on docker tasks to mimic AWS

---
 .envrc                    | 5 +++++
 .envrc.local.template     | 5 +++++
 scripts/run-scan-lambda   | 3 +++
 scripts/run-update-lambda | 3 +++
 4 files changed, 16 insertions(+)

diff --git a/.envrc b/.envrc
index 19328600..be5d7f6f 100644
--- a/.envrc
+++ b/.envrc
@@ -50,6 +50,11 @@ check_required_variables() {
 # Project Configuration #
 #########################
 
+# Lamdba resource constraints (Override in .envrc.local)
+# https://docs.docker.com/config/containers/resource_constraints/
+export MEM=512m
+export CPUS=1.0
+
 require AV_DEFINITION_S3_BUCKET "Add this variable to your .envrc.local"
 require AV_DEFINITION_S3_PREFIX "Add this variable to your .envrc.local"
 
diff --git a/.envrc.local.template b/.envrc.local.template
index 8bd08938..c040e468 100644
--- a/.envrc.local.template
+++ b/.envrc.local.template
@@ -12,6 +12,11 @@
 # export AWS_SECRET_ACCESS_KEY
 # export AWS_SESSION_TOKEN
 
+# Lamdba resource constraints you can override here
+# https://docs.docker.com/config/containers/resource_constraints/
+# export MEM=512m
+# export CPUS=1.0
+
 # Required for both scan and update lambdas scripts
 export AV_DEFINITION_S3_BUCKET=""
 export AV_DEFINITION_S3_PREFIX=""
diff --git a/scripts/run-scan-lambda b/scripts/run-scan-lambda
index 1254a829..9a7c835a 100755
--- a/scripts/run-scan-lambda
+++ b/scripts/run-scan-lambda
@@ -43,4 +43,7 @@ docker run --rm \
   -e AWS_REGION \
   -e AWS_SECRET_ACCESS_KEY \
   -e AWS_SESSION_TOKEN \
+  --memory="${MEM}" \
+  --memory-swap="${MEM}" \
+  --cpus="${CPUS}" \
   lambci/lambda:python2.7 scan.lambda_handler "${EVENT}"
diff --git a/scripts/run-update-lambda b/scripts/run-update-lambda
index 47ccf18e..cdb64f4b 100755
--- a/scripts/run-update-lambda
+++ b/scripts/run-update-lambda
@@ -20,4 +20,7 @@ docker run --rm \
   -e AWS_SECRET_ACCESS_KEY \
   -e AWS_SESSION_TOKEN \
   -e CLAMAVLIB_PATH \
+  --memory="${MEM}" \
+  --memory-swap="${MEM}" \
+  --cpus="${CPUS}" \
   lambci/lambda:python2.7 update.lambda_handler

From eb0862cb20b74cf0c3b40d8137cda80d1e9c4438 Mon Sep 17 00:00:00 2001
From: Chris Gilmer <chris@truss.works>
Date: Fri, 8 Nov 2019 12:44:53 -0800
Subject: [PATCH 3/5] Update allowed expected errors when getitng tags

---
 clamav.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/clamav.py b/clamav.py
index f5694408..ea83b62a 100644
--- a/clamav.py
+++ b/clamav.py
@@ -146,7 +146,12 @@ def md5_from_s3_tags(s3_client, bucket, key):
     try:
         tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["TagSet"]
     except botocore.exceptions.ClientError as e:
-        expected_errors = {"404", "AccessDenied", "NoSuchKey"}
+        expected_errors = {
+            "404",  # Object does not exist
+            "AccessDenied",  # Object cannot be accessed
+            "NoSuchKey",  # Object does not exist
+            "MethodNotAllowed",  # Object deleted in bucket with versioning
+        }
         if e.response["Error"]["Code"] in expected_errors:
             return ""
         else:

From 3757dd82c69c119cf0e82ef32ebd0d49b28651f6 Mon Sep 17 00:00:00 2001
From: Chris Gilmer <chris@truss.works>
Date: Fri, 8 Nov 2019 13:06:36 -0800
Subject: [PATCH 4/5] Update mem recommendations to 1024MB

---
 .envrc                | 2 +-
 .envrc.local.template | 2 +-
 README.md             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.envrc b/.envrc
index be5d7f6f..c542a21f 100644
--- a/.envrc
+++ b/.envrc
@@ -52,7 +52,7 @@ check_required_variables() {
 
 # Lamdba resource constraints (Override in .envrc.local)
 # https://docs.docker.com/config/containers/resource_constraints/
-export MEM=512m
+export MEM=1024m
 export CPUS=1.0
 
 require AV_DEFINITION_S3_BUCKET "Add this variable to your .envrc.local"
diff --git a/.envrc.local.template b/.envrc.local.template
index c040e468..a196e5d3 100644
--- a/.envrc.local.template
+++ b/.envrc.local.template
@@ -14,7 +14,7 @@
 
 # Lamdba resource constraints you can override here
 # https://docs.docker.com/config/containers/resource_constraints/
-# export MEM=512m
+# export MEM=1024m
 # export CPUS=1.0
 
 # Required for both scan and update lambdas scripts
diff --git a/README.md b/README.md
index 33ecc7b6..b3f7c04e 100644
--- a/README.md
+++ b/README.md
@@ -133,7 +133,7 @@ and set its value to the name of the bucket created to store your AV
 definitions.
 11. Set *Lambda handler* to `update.lambda_handler`
 12. Under *Basic Settings*, set *Timeout* to **5 minutes** and *Memory* to
-**512**
+**1024**
 13. Save and test your function.  If prompted for test data, just use
 the default provided.
 

From c1b672bce2d5c6c311109d51192c3efd5140ae5a Mon Sep 17 00:00:00 2001
From: Chris Gilmer <chris@truss.works>
Date: Fri, 8 Nov 2019 13:06:48 -0800
Subject: [PATCH 5/5] Add names to docker containers

---
 scripts/run-scan-lambda   | 3 +++
 scripts/run-update-lambda | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/scripts/run-scan-lambda b/scripts/run-scan-lambda
index 9a7c835a..2d6f21b0 100755
--- a/scripts/run-scan-lambda
+++ b/scripts/run-scan-lambda
@@ -22,6 +22,8 @@ aws s3 ls "s3://${BUCKET}/${KEY}"
 rm -rf tmp/
 unzip -qq -d ./tmp build/lambda.zip
 
+NAME="antivirus-scan"
+
 docker run --rm \
   -v "$(pwd)/tmp/:/var/task" \
   -e AV_DEFINITION_S3_BUCKET \
@@ -46,4 +48,5 @@ docker run --rm \
   --memory="${MEM}" \
   --memory-swap="${MEM}" \
   --cpus="${CPUS}" \
+  --name="${NAME}" \
   lambci/lambda:python2.7 scan.lambda_handler "${EVENT}"
diff --git a/scripts/run-update-lambda b/scripts/run-update-lambda
index cdb64f4b..66706a89 100755
--- a/scripts/run-update-lambda
+++ b/scripts/run-update-lambda
@@ -9,6 +9,8 @@ set -eu -o pipefail
 rm -rf tmp/
 unzip -qq -d ./tmp build/lambda.zip
 
+NAME="antivirus-update"
+
 docker run --rm \
   -v "$(pwd)/tmp/:/var/task" \
   -e AV_DEFINITION_PATH \
@@ -23,4 +25,5 @@ docker run --rm \
   --memory="${MEM}" \
   --memory-swap="${MEM}" \
   --cpus="${CPUS}" \
+  --name="${NAME}" \
   lambci/lambda:python2.7 update.lambda_handler