Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement S3 bucket and Snowflake architecture for raw PeMS data #10

Merged
merged 4 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ jobs:
# just use $SNOWFLAKE_PRIVATE_KEY
- name: Set up private key
run: echo "$PRIVATE_KEY" > $SNOWFLAKE_PRIVATE_KEY_PATH
- name: Setup terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: v1.4.0
- name: Install tflint
run: |
curl -s https://raw.githubusercontent.com/terraform-linters/\
tflint/master/install_linux.sh | bash
- uses: actions/setup-python@v3
- uses: snok/install-poetry@v1
with:
Expand Down
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ repos:
hooks:
- id: yamllint
args: []
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.77.1
hooks:
- id: terraform_fmt
- id: terraform_validate
# Exclude modules to work around
# https://github.com/hashicorp/terraform/issues/28490
exclude: "terraform/[^/]+/modules/[^/]+/[^/]+$"
- id: terraform_tflint
- repo: local
hooks:
- name: Dbt deps
Expand Down
1 change: 1 addition & 0 deletions terraform/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.terraform
46 changes: 46 additions & 0 deletions terraform/environments/dev/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions terraform/environments/dev/caltrans-pems-dev.tfbackend
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bucket = "dse-infra-dev-terraform-state"
dynamodb_table = "dse-infra-dev-terraform-state-lock"
key = "caltrans-pems-dev.tfstate"
region = "us-west-1"
100 changes: 100 additions & 0 deletions terraform/environments/dev/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
##################################
# Terraform Setup #
##################################

terraform {
required_version = ">= 1.0"

required_providers {
aws = {
source = "hashicorp/aws"
version = "4.56.0"
}
snowflake = {
source = "Snowflake-Labs/snowflake"
version = "0.69"
}
}

backend "s3" {
}
}

locals {
owner = "caltrans"
environment = "dev"
project = "pems"
region = "us-west-2"
locator = "NGB13288"
}

provider "aws" {
region = local.region

default_tags {
tags = {
Owner = local.owner
Project = local.project
Environment = local.environment
}
}
}

# This provider is intentionally low-permission. In Snowflake, object creators are
# the default owners of the object. To control the owner, we create different provider
# blocks with different roles, and require that all snowflake resources explicitly
# flag the role they want for the creator.
provider "snowflake" {
account = local.locator
role = "PUBLIC"
}

# Snowflake provider for creating databases, warehouses, etc.
provider "snowflake" {
alias = "sysadmin"
account = local.locator
role = "SYSADMIN"
}

# Snowflake provider for managing grants to roles.
provider "snowflake" {
alias = "securityadmin"
account = local.locator
role = "SECURITYADMIN"
}

# Snowflake provider for managing user accounts and roles.
provider "snowflake" {
alias = "useradmin"
account = local.locator
role = "USERADMIN"
}

############################
# AWS Infrastructure #
############################

module "s3_lake" {
source = "../../modules/s3-lake"
providers = {
aws = aws
}

prefix = "${local.owner}-${local.project}-${local.environment}"
region = local.region
}

############################
# Snowflake Infrastructure #
############################

module "elt" {
source = "github.com/cagov/data-infrastructure.git//terraform/snowflake/modules/elt?ref=74a522f"
providers = {
snowflake.securityadmin = snowflake.securityadmin,
snowflake.sysadmin = snowflake.sysadmin,
snowflake.useradmin = snowflake.useradmin,
}

environment = upper(local.environment)
}
16 changes: 16 additions & 0 deletions terraform/modules/s3-lake/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##################################
# IAM Service Users #
##################################

# NOTE: in general, policies and roles are defined close to the resources
# they support.

# Airflow service user for writing to S3
resource "aws_iam_user" "airflow_s3_writer" {
name = "${var.prefix}-airflow-s3-writer"
}

resource "aws_iam_user_policy_attachment" "airflow_s3_writer_policy_attachment" {
user = aws_iam_user.airflow_s3_writer.name
policy_arn = aws_iam_policy.pems_raw_write.arn
}
18 changes: 18 additions & 0 deletions terraform/modules/s3-lake/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
##################################
# Terraform Setup #
##################################

terraform {
# Note: when a package is added or updated, we have to update the lockfile in a
# platform-independent way, cf. https://github.com/hashicorp/terraform/issues/28041
# To update the lockfile run:
#
# terraform providers lock -platform=linux_amd64 -platform=darwin_amd64
required_providers {
aws = {
source = "hashicorp/aws"
version = "4.56.0"
}
}
required_version = ">= 1.0"
}
72 changes: 72 additions & 0 deletions terraform/modules/s3-lake/s3.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
##################################
# Caltrans PeMS Infrastructure #
##################################

# PeMS raw
resource "aws_s3_bucket" "pems_raw" {
bucket = "${var.prefix}-${var.region}-raw"
}

# Versioning
resource "aws_s3_bucket_versioning" "pems_raw" {
bucket = aws_s3_bucket.pems_raw.bucket
versioning_configuration {
status = "Enabled"
}
}

# Write access
data "aws_iam_policy_document" "pems_raw_write" {
statement {
actions = [
"s3:ListBucket"
]
resources = [aws_s3_bucket.pems_raw.arn]
}
statement {
actions = [
"s3:PutObject",
]
resources = ["${aws_s3_bucket.pems_raw.arn}/*"]
}
}

resource "aws_iam_policy" "pems_raw_write" {
name = "${var.prefix}-${var.region}-raw-write"
description = "Policy allowing write for s3 pems raw bucket"
policy = data.aws_iam_policy_document.pems_raw_write.json
}


# Public read access
data "aws_iam_policy_document" "pems_raw_read" {
statement {
principals {
type = "AWS"
identifiers = ["*"]
}
actions = [
"s3:GetObject",
"s3:ListBucket",
]

resources = [
aws_s3_bucket.pems_raw.arn,
"${aws_s3_bucket.pems_raw.arn}/*",
]
}
}

resource "aws_s3_bucket_policy" "pems_raw_read" {
bucket = aws_s3_bucket.pems_raw.id
policy = data.aws_iam_policy_document.pems_raw_read.json
}

resource "aws_s3_bucket_public_access_block" "pems_raw" {
bucket = aws_s3_bucket.pems_raw.id

block_public_acls = false
block_public_policy = false
ignore_public_acls = false
restrict_public_buckets = false
}
10 changes: 10 additions & 0 deletions terraform/modules/s3-lake/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
variable "prefix" {
description = "Prefix for resource names"
type = string
}

variable "region" {
description = "Region for AWS resources"
type = string
default = "us-west-2"
}