From c7220a64bffc0ee5e786f563cc611fa5a1cb56e5 Mon Sep 17 00:00:00 2001 From: Milan Simonovic Date: Fri, 6 May 2022 11:00:03 +0200 Subject: [PATCH] Firewall (#22) * add CloudFront in front of ALB * enable WAF on CloudFront (so other services call bypass waf and hit ALB) --- main.tf | 272 ++++++++++++++++----------- modules/waf/main.tf | 72 +++++-- scripts/firewall/analyzed/analyze.js | 86 +++++++++ scripts/firewall/analyzed/to_json.sh | 17 ++ scripts/firewall/sync.sh | 9 + 5 files changed, 329 insertions(+), 127 deletions(-) create mode 100644 scripts/firewall/analyzed/analyze.js create mode 100755 scripts/firewall/analyzed/to_json.sh create mode 100755 scripts/firewall/sync.sh diff --git a/main.tf b/main.tf index 78e0f20..4c4b9d5 100644 --- a/main.tf +++ b/main.tf @@ -2,8 +2,41 @@ provider "aws" { region = var.aws_region } +# to work with CLOUDFRONT firewall region must be us-east-1 +provider "aws" { + region = "us-east-1" + alias = "us-east-1" +} + data "aws_caller_identity" "current" {} +data "aws_elastic_beanstalk_hosted_zone" "current" {} + +data "aws_elastic_beanstalk_solution_stack" "multi_docker" { + most_recent = true + name_regex = "^64bit Amazon Linux (.*) Multi-container Docker (.*)$" +} + +### +# Find a certificate for our domain that has status ISSUED +# NOTE that for now, this infra depends on managing certs INSIDE AWS/ACM +### +data "aws_acm_certificate" "localregion" { + domain = var.aws_acm_certificate_domain + statuses = ["ISSUED"] +} + +data "aws_acm_certificate" "cdn" { + provider = aws.us-east-1 + domain = var.aws_acm_certificate_domain + statuses = ["ISSUED"] +} + +locals { + namespace = "${var.eb_env_namespace}-${var.eb_env_stage}-${var.eb_env_name}" + alb_url = "eb-${var.aws_region}-${var.eb_env_stage}.${var.aws_acm_certificate_domain}" +} + module "vpc" { source = "git::https://github.com/cloudposse/terraform-aws-vpc.git?ref=tags/0.25.0" namespace = var.eb_env_namespace @@ -42,17 +75,6 @@ module "elastic_beanstalk_application" { description = var.eb_env_description } -data "aws_elastic_beanstalk_hosted_zone" "current" {} - -data "aws_elastic_beanstalk_solution_stack" "multi_docker" { - most_recent = true - name_regex = "^64bit Amazon Linux (.*) Multi-container Docker (.*)$" -} - -locals { - namespace = "${var.eb_env_namespace}-${var.eb_env_stage}-${var.eb_env_name}" -} - ### # all infra for transcribing mentor videos with py-transcribe-aws module # (IAM, s3 bucket, keys, policies, etc) @@ -129,39 +151,32 @@ resource "aws_ssm_parameter" "cdn_static_param" { value = module.cdn_static.s3_bucket_arn } + ### # the main elastic beanstalk env for this app ### module "elastic_beanstalk_environment" { - source = "git::https://github.com/cloudposse/terraform-aws-elastic-beanstalk-environment.git?ref=tags/0.40.0" - namespace = var.eb_env_namespace - stage = var.eb_env_stage - name = var.eb_env_name - attributes = var.eb_env_attributes - tags = var.eb_env_tags - delimiter = var.eb_env_delimiter - description = var.eb_env_description - region = var.aws_region - availability_zone_selector = var.eb_env_availability_zone_selector - # NOTE: We would prefer for the DNS name - # of module.elastic_beanstalk_environment - # to be staticly set via inputs, - # but have been running into other/different problems - # trying to get that to work - # (for one thing, permissions error anytime try to set - # elastic_beanstalk_environment.dns_zone_id) - # dns_zone_id = data.aws_elastic_beanstalk_hosted_zone.current.id - # dns_zone_id = var.dns_zone_id + source = "git::https://github.com/cloudposse/terraform-aws-elastic-beanstalk-environment.git?ref=tags/0.40.0" + namespace = var.eb_env_namespace + stage = var.eb_env_stage + name = var.eb_env_name + attributes = var.eb_env_attributes + tags = var.eb_env_tags + delimiter = var.eb_env_delimiter + description = var.eb_env_description + region = var.aws_region + availability_zone_selector = var.eb_env_availability_zone_selector wait_for_ready_timeout = var.eb_env_wait_for_ready_timeout elastic_beanstalk_application_name = module.elastic_beanstalk_application.elastic_beanstalk_application_name environment_type = var.eb_env_environment_type loadbalancer_type = var.eb_env_loadbalancer_type - loadbalancer_certificate_arn = data.aws_acm_certificate.localregion.arn loadbalancer_ssl_policy = var.eb_env_loadbalancer_ssl_policy - elb_scheme = var.eb_env_elb_scheme - tier = "WebServer" - version_label = var.eb_env_version_label - force_destroy = var.eb_env_log_bucket_force_destroy + loadbalancer_certificate_arn = data.aws_acm_certificate.localregion.arn + + elb_scheme = var.eb_env_elb_scheme + tier = "WebServer" + version_label = var.eb_env_version_label + force_destroy = var.eb_env_log_bucket_force_destroy enable_stream_logs = var.eb_env_enable_stream_logs logs_delete_on_terminate = var.eb_env_logs_delete_on_terminate @@ -211,7 +226,6 @@ module "elastic_beanstalk_environment" { GOOGLE_CLIENT_ID = var.google_client_id, JWT_SECRET = var.secret_jwt_key, MONGO_URI = var.secret_mongo_uri, - STAGE = "v2", STATIC_AWS_ACCESS_KEY_ID = aws_iam_access_key.static_upload_policy_access_key.id, STATIC_AWS_SECRET_ACCESS_KEY = aws_iam_access_key.static_upload_policy_access_key.secret, STATIC_AWS_REGION = var.aws_region, @@ -229,36 +243,6 @@ module "elastic_beanstalk_environment" { prefer_legacy_ssm_policy = false } -data "aws_iam_policy_document" "minimal_s3_permissions" { - statement { - sid = "AllowS3OperationsOnElasticBeanstalkBuckets" - actions = [ - "s3:ListAllMyBuckets", - "s3:GetBucketLocation" - ] - resources = ["*"] - } -} - -provider "aws" { - region = "us-east-1" - alias = "us-east-1" -} - -### -# Find a certificate for our domain that has status ISSUED -# NOTE that for now, this infra depends on managing certs INSIDE AWS/ACM -### -data "aws_acm_certificate" "localregion" { - domain = var.aws_acm_certificate_domain - statuses = ["ISSUED"] -} - -data "aws_acm_certificate" "cdn" { - provider = aws.us-east-1 - domain = var.aws_acm_certificate_domain - statuses = ["ISSUED"] -} data "aws_route53_zone" "main" { name = var.aws_route53_zone_name @@ -267,7 +251,7 @@ data "aws_route53_zone" "main" { # create dns record of type "A" resource "aws_route53_record" "site_domain_name" { zone_id = data.aws_route53_zone.main.zone_id - name = var.site_domain_name + name = local.alb_url type = "A" allow_overwrite = true alias { @@ -277,70 +261,134 @@ resource "aws_route53_record" "site_domain_name" { } } -### -# Shared network file system that will store trained models, etc. -# Using a network file system allows separate processes -# to read/write a common set of files -# (e.g. training writes models read by classifier api) -### -module "efs" { - source = "git::https://github.com/cloudposse/terraform-aws-efs.git?ref=tags/0.30.1" - namespace = var.eb_env_namespace - stage = var.eb_env_stage - name = var.eb_env_name - region = var.aws_region - vpc_id = module.vpc.vpc_id - subnets = module.subnets.private_subnet_ids - security_groups = [ - module.vpc.vpc_default_security_group_id, - module.elastic_beanstalk_environment.security_group_id - ] +resource "aws_ssm_parameter" "alb_url_param" { + name = "/${var.eb_env_name}/${var.eb_env_stage}/alb_url" + description = "Load Balancer url" + type = "String" + # value = module.elastic_beanstalk_environment.endpoint + value = local.alb_url } -# find the HTTP load-balancer listener, so we can redirect to HTTPS -data "aws_lb_listener" "http_listener" { - load_balancer_arn = module.elastic_beanstalk_environment.load_balancers[0] - port = 80 -} -# set the HTTP -> HTTPS redirect rule for any request matching site domain -resource "aws_lb_listener_rule" "redirect_http_to_https" { - listener_arn = data.aws_lb_listener.http_listener.arn - action { - type = "redirect" - redirect { - port = "443" - protocol = "HTTPS" - status_code = "HTTP_301" - } - } - condition { - host_header { - values = [var.site_domain_name] - } - } -} ##### # Firewall # ##### - module "firewall" { source = "./modules/waf" aws_region = var.aws_region environment = var.eb_env_stage top_level_domain = var.site_domain_name - rate_limit = 100 + rate_limit = 1000 tags = var.eb_env_tags } -resource "aws_wafv2_web_acl_association" "load_blancer_firewall" { - resource_arn = module.elastic_beanstalk_environment.load_balancers[0] - web_acl_arn = module.firewall.wafv2_webacl_arn +###### +# CloudFront distro in front of Beanstalk +# + +# the default policy does not include query strings as cache keys +resource "aws_cloudfront_cache_policy" "cdn_beanstalk_cache" { + name = "${local.namespace}-cdn-cache-policy" + default_ttl = 300 # 5min + min_ttl = 0 + max_ttl = 86400 # 1 day + + parameters_in_cache_key_and_forwarded_to_origin { + cookies_config { + cookie_behavior = "none" + } + headers_config { + header_behavior = "none" + } + query_strings_config { + query_string_behavior = "all" + } + } +} + +resource "aws_cloudfront_origin_request_policy" "cdn_beanstalk_origin_policy" { + name = "${local.namespace}-cdn-origin-policy" + + cookies_config { + cookie_behavior = "all" + } + headers_config { + header_behavior = "allViewer" + } + query_strings_config { + query_string_behavior = "all" + } +} + +module "cdn_beanstalk" { + source = "git::https://github.com/cloudposse/terraform-aws-cloudfront-cdn.git?ref=tags/0.24.1" + acm_certificate_arn = data.aws_acm_certificate.localregion.arn + aliases = [var.site_domain_name] + allowed_methods = ["HEAD", "DELETE", "POST", "GET", "OPTIONS", "PUT", "PATCH"] + cache_policy_id = resource.aws_cloudfront_cache_policy.cdn_beanstalk_cache.id + compress = true + cached_methods = ["GET", "HEAD"] + forward_query_string = true + forward_cookies = "none" + is_ipv6_enabled = true + # logging config, disable because we have from the service itself + logging_enabled = false + log_expiration_days = 30 + name = var.eb_env_name + namespace = var.eb_env_namespace + environment = var.aws_region + # origin_domain_name = module.elastic_beanstalk_environment.endpoint + origin_domain_name = local.alb_url + origin_protocol_policy = "https-only" + origin_request_policy_id = resource.aws_cloudfront_origin_request_policy.cdn_beanstalk_origin_policy.id + origin_ssl_protocols = ["TLSv1.2"] + parent_zone_name = var.aws_route53_zone_name + price_class = "PriceClass_All" + stage = var.eb_env_stage + viewer_protocol_policy = "https-only" + viewer_minimum_protocol_version = "TLSv1.2_2019" + web_acl_id = module.firewall.wafv2_webacl_arn +} + + +data "aws_lb_listener" "http_listener" { + load_balancer_arn = module.elastic_beanstalk_environment.load_balancers[0] + port = 80 +} + +data "aws_iam_policy_document" "minimal_s3_permissions" { + statement { + sid = "AllowS3OperationsOnElasticBeanstalkBuckets" + actions = [ + "s3:ListAllMyBuckets", + "s3:GetBucketLocation" + ] + resources = ["*"] + } } +### +# Shared network file system that will store trained models, etc. +# Using a network file system allows separate processes +# to read/write a common set of files +# (e.g. training writes models read by classifier api) +### +module "efs" { + source = "git::https://github.com/cloudposse/terraform-aws-efs.git?ref=tags/0.30.1" + namespace = var.eb_env_namespace + stage = var.eb_env_stage + name = var.eb_env_name + region = var.aws_region + vpc_id = module.vpc.vpc_id + subnets = module.subnets.private_subnet_ids + security_groups = [ + module.vpc.vpc_default_security_group_id, + module.elastic_beanstalk_environment.security_group_id + ] +} ###### # Cloudwatch alarms diff --git a/modules/waf/main.tf b/modules/waf/main.tf index 3df3c1a..5560331 100644 --- a/modules/waf/main.tf +++ b/modules/waf/main.tf @@ -1,6 +1,6 @@ resource "aws_wafv2_web_acl" "wafv2_webacl" { name = "mentorpal-${var.environment}-wafv2-webacl" - scope = "REGIONAL" + scope = "CLOUDFRONT" tags = var.tags default_action { @@ -9,7 +9,7 @@ resource "aws_wafv2_web_acl" "wafv2_webacl" { rule { name = "ip-rate-limit-rule" - priority = 2 + priority = 1 action { block {} @@ -29,26 +29,76 @@ resource "aws_wafv2_web_acl" "wafv2_webacl" { } } + rule { + name = "common-control" + priority = 2 + + override_action { + none {} + } + statement { + managed_rule_group_statement { + # see https://docs.aws.amazon.com/waf/latest/developerguide/aws-managed-rule-groups-baseline.html#aws-managed-rule-groups-baseline-crs + name = "AWSManagedRulesCommonRuleSet" + vendor_name = "AWS" + excluded_rule { + # 8kb is not enough to post videos + name = "SizeRestrictions_BODY" + } + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "AWS-Common-rule" + sampled_requests_enabled = true + } + } + rule { name = "bot-control" priority = 3 override_action { - # in order to test, lets just collect stats before enabling rules on prod: - count {} - # none {} + none {} } statement { managed_rule_group_statement { - # see https://docs.aws.amazon.com/waf/latest/developerguide/aws-managed-rule-groups-list.html#aws-managed-rule-groups-bot + # see https://docs.aws.amazon.com/waf/latest/developerguide/aws-managed-rule-groups-bot.html name = "AWSManagedRulesBotControlRuleSet" vendor_name = "AWS" + + excluded_rule { + name = "CategorySocialMedia" # slack + } + excluded_rule { + name = "CategorySearchEngine" # google bot + } } } visibility_config { cloudwatch_metrics_enabled = true - metric_name = "AWS-AWSBotControl-rule" + metric_name = "AWS-BotControl-rule" + sampled_requests_enabled = true + } + } + + rule { + name = "AWSManagedRulesLinuxRuleSet" + priority = 4 + override_action { + none {} + } + statement { + managed_rule_group_statement { + name = "AWSManagedRulesLinuxRuleSet" + vendor_name = "AWS" + } + } + visibility_config { + metric_name = "AWS-Linux-rule" + cloudwatch_metrics_enabled = true sampled_requests_enabled = true } } @@ -60,14 +110,6 @@ resource "aws_wafv2_web_acl" "wafv2_webacl" { } } -resource "aws_ssm_parameter" "origin_acl_arn" { - name = "/mentorpal/${var.environment}/firewall/WEBACL_ARN" - type = "String" - value = aws_wafv2_web_acl.wafv2_webacl.arn - - tags = var.tags -} - resource "aws_s3_bucket" "s3_logs" { bucket = "mentorpal-aws-waf-logs-${var.aws_region}-${var.environment}" acl = "private" diff --git a/scripts/firewall/analyzed/analyze.js b/scripts/firewall/analyzed/analyze.js new file mode 100644 index 0000000..d4716fe --- /dev/null +++ b/scripts/firewall/analyzed/analyze.js @@ -0,0 +1,86 @@ +let l = require('./uncompressed-cf.json') +l.pop() // last one is empty, added by the to_json.sh script + +// let b = l.filter(e=>!e.labels.includes('awswaf:managed:aws:bot-control')) +// let b = l.filter(e=>e.labels && e.labels.includes('awswaf:managed:aws:bot-control')) +// b = l.filter(e=>e.labels && e.labels.includes('awswaf:managed:aws:bot-control')) +// let bots = l.filter(e=>e.labels && e.labels.includes('awswaf:managed:aws:bot-control')) + +let labeled = l.filter(e=>e.labels); +labeled.forEach(e=> e.labels= e.labels.map(i=>i.name)); // simplify for easier filtering +console.log('total requests', l.length, 'labeled requests', labeled.length); + +let labels = labeled.map(e=>e.labels) +let lab = new Set() +labels.forEach(e=> e.forEach(n => lab.add(n))) +lab = [...lab].sort() +console.log('unique labels:',lab) + +// let social = labeled.filter(e=>e.labels.includes('awswaf:managed:aws:bot-control:bot:category:social_media')) +// console.log('social bots: ', social.length) + +let countries = new Set(l.map(e=>e.httpRequest.country)) +console.log(l.length, 'requests came from these countries:', countries); + +// let verified = labeled.filter(e=>e.labels.includes('awswaf:managed:aws:bot-control:bot:verified')) +// verified.length + +fs.writeFileSync('labeled-cf.json',JSON.stringify(labeled,null,2)) + +// l.filter(e=>e.httpRequest.clientIpd == '52.77.238.223') +// let sg = l.filter(e=>e.httpRequest.clientIp == '52.77.238.223') +// // let sgnl = sg.filter(e=>!e.labels) +// sg.filter(e=>e.labels).map(e=>e.labels) +// sg.map(e=>`${e.httpRequest.httpMethod} ${e.httpRequest.uri}${e.httpRequest.args ? e.httpRequest.args: ''}`) +// let uris = new Set(sg.map(e=>`${e.httpRequest.httpMethod} ${e.httpRequest.uri}${e.httpRequest.args ? e.httpRequest.args: ''}`)) + +let bad = labeled.filter(e=>e.httpRequest.clientIp != '52.77.238.223'). // manually verified bot + filter(e=>!e.labels.includes('awswaf:managed:aws:bot-control:bot:verified')). + filter(e=>!e.labels.includes('awswaf:managed:aws:bot-control:bot:category:social_media')). + filter(e=>!e.labels.includes('awswaf:managed:aws:bot-control:bot:category:search_engine')). + filter(e=>!e.labels.includes('awswaf:managed:aws:bot-control:bot:category:http_library')) + // filter(e=>!e.labels.includes("awswaf:managed:aws:bot-control:signal:non_browser_user_agent")). // these will be blocked next + // filter(e=>!e.labels.includes("awswaf:managed:aws:bot-control:signal:known_bot_data_center")) // these will be blocked next +//map remaining to one per category: +let sample = {} +bad.forEach(e=>e.labels.forEach(label=>sample[label] = e)) +fs.writeFileSync('bad-sample-cf.json',JSON.stringify(sample,null,2)) + +let counts = Object.keys(sample).map(label => ({[label]: bad.filter(e=>e.labels.includes(label)).length})) +console.log('request counts per bot category:', JSON.stringify(counts, null, 2)) + +// // new Set(bad.map(e=>`${e.httpRequest.httpMethod} ${e.httpRequest.uri}${e.httpRequest.args ? e.httpRequest.args: ''}`)) +// // bad.filter(e=>e.httpRequest.uri == '/home').map(e=>e.labels) +// // bad.filter(e=>e.httpRequest.uri == '/').map(e=>e.labels) +// // bad.filter(e=>e.httpRequest.uri == '/' && e.httpRequest.httpMethod != 'GET') +// // let graphql = bad.filter(e=>e.httpRequest.uri == '/graphql') +// // graphql.length +// // console.log(JSON.stringify(graphql[0],null,2)) + +// // slim down before writing for manual inspection: +// bad.forEach(e=>delete e.rateBasedRuleList) +// bad.forEach(e=>delete e.httpSourceId) +// bad.forEach(e=>delete e.httpSourceName) +// bad.forEach(e=>delete e.weebaclId) +// bad.forEach(e=>delete e.nonTerminatingMatchingRules) +// bad.forEach(e=>delete e.ruleGroupList) +// bad.forEach(e=>delete e.webaclId) +// bad.forEach(e=>delete e.terminatingRuleId) +// bad.forEach(e=>delete e.requestHeadersInserted) +// bad.forEach(e=>delete e.responseCodeSent) +// bad.forEach(e=>delete e.terminatingRuleMatchDetails) + +// // let allbad = bad +// // bad.filter(e=>e.httpRequest.uri.endsWith('.php')).length +// // bad = bad.filter(e=>!e.httpRequest.uri.endsWith('.php')) +// // bad.filter(e=>!e.httpRequest.uri.endsWith('.asp')) +// // bad.filter(e=>!e.httpRequest.uri.endsWith('.aspx')) +// // bad.filter(e=>!e.httpRequest.uri.endsWith('.asp')) +// // bad.filter(e=>!e.httpRequest.uri.endsWith('.env')) +// // bad = bad.filter(e=>!e.httpRequest.uri.endsWith('.env')) + +fs.writeFileSync('bad-cf.json',JSON.stringify(bad,null,2)) + +// investigate blocked to see if some should be allowed: +let blocked = l.filter(e=>e.action !='ALLOW' && e.terminatingRuleType != "RATE_BASED") +fs.writeFileSync('blocked-cf.json',JSON.stringify(blocked,null,2)) diff --git a/scripts/firewall/analyzed/to_json.sh b/scripts/firewall/analyzed/to_json.sh new file mode 100755 index 0000000..bf898d3 --- /dev/null +++ b/scripts/firewall/analyzed/to_json.sh @@ -0,0 +1,17 @@ +#!/bin/bash +echo '[' > uncompressed-v2.json +for i in `find ../logs-v2/2022 -name *gz -print` +do + gzip -cd $i | tr '\n' ',' >> uncompressed-v2.json +done +# to make json valid we need another element after the last comma: +echo '{}]' >> uncompressed-v2.json + + +echo '[' > uncompressed-cf.json +for i in `find ../logs-cf/2022 -name *gz -print` +do + gzip -cd $i | tr '\n' ',' >> uncompressed-cf.json +done +# to make json valid we need another element after the last comma: +echo '{}]' >> uncompressed-cf.json diff --git a/scripts/firewall/sync.sh b/scripts/firewall/sync.sh new file mode 100755 index 0000000..1e272fe --- /dev/null +++ b/scripts/firewall/sync.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# fetch firewall logs first: + +aws s3 sync s3://mentorpal-aws-waf-logs-us-east-1-v2 ./logs-v2 +aws s3 sync s3://mentorpal-aws-waf-logs-us-west-2-cf ./logs-cf + +# then go to ./analyzed and run to_json.sh first and then +# use the analyze.js script to inspect logs