Skip to content

Commit

Permalink
Merge pull request #20 from cagov/lake-as-external-stage
Browse files Browse the repository at this point in the history
Set up external stage and pipes
  • Loading branch information
ian-r-rose authored Dec 5, 2023
2 parents 0f40975 + 391a20f commit b1814b0
Show file tree
Hide file tree
Showing 18 changed files with 526 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
- id: terraform_validate
# Exclude modules to work around
# https://github.com/hashicorp/terraform/issues/28490
exclude: "terraform/[^/]+/modules/[^/]+/[^/]+$"
exclude: "terraform/modules/[^/]+/[^/]+$"
- id: terraform_tflint
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.1.6
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

77 changes: 77 additions & 0 deletions scripts/setup_clearinghouse.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
CREATE SCHEMA IF NOT EXISTS CLEARINGHOUSE;

CREATE OR REPLACE FILE FORMAT CLEARINGHOUSE.STATION_RAW
TYPE = csv
PARSE_HEADER = false
FIELD_DELIMITER = ','
SKIP_HEADER = 0
COMPRESSION='gzip';

CREATE OR REPLACE FILE FORMAT CLEARINGHOUSE.STATION_META
TYPE = csv
PARSE_HEADER = false
FIELD_DELIMITER = '\t'
SKIP_HEADER = 1;

CREATE OR REPLACE FILE FORMAT CLEARINGHOUSE.STATION_STATUS
TYPE = XML
STRIP_OUTER_ELEMENT = TRUE;

CREATE TABLE IF NOT EXISTS CLEARINGHOUSE.STATION_RAW (
FILENAME TEXT,
SAMPLE_TIMESTAMP TIMESTAMP_NTZ,
SAMPLE_DATE DATE,
ID TEXT,
FLOW_1 INT,
OCCUPANCY_1 FLOAT,
SPEED_1 FLOAT,
FLOW_2 INT,
OCCUPANCY_2 FLOAT,
SPEED_2 FLOAT,
FLOW_3 INT,
OCCUPANCY_3 FLOAT,
SPEED_3 FLOAT,
FLOW_4 INT,
OCCUPANCY_4 FLOAT,
SPEED_4 FLOAT,
FLOW_5 INT,
OCCUPANCY_5 FLOAT,
SPEED_5 FLOAT,
FLOW_6 INT,
OCCUPANCY_6 FLOAT,
SPEED_6 FLOAT,
FLOW_7 INT,
OCCUPANCY_7 FLOAT,
SPEED_7 FLOAT,
FLOW_8 INT,
OCCUPANCY_8 FLOAT,
SPEED_8 FLOAT
)
CLUSTER BY (SAMPLE_DATE);

CREATE TABLE IF NOT EXISTS CLEARINGHOUSE.STATION_META (
FILENAME TEXT,
ID TEXT,
FWY TEXT,
DIR TEXT,
DISTRICT TEXT,
COUNTY TEXT,
CITY TEXT,
STATE_PM TEXT,
ABS_PM TEXT,
LATITUDE FLOAT,
LONGITUDE FLOAT,
LENGTH FLOAT,
TYPE TEXT,
LANES INT,
NAME TEXT,
USER_ID_1 TEXT,
USER_ID_2 TEXT,
USER_ID_3 TEXT,
USER_ID_4 TEXT
);

CREATE TABLE IF NOT EXISTS CLEARINGHOUSE.STATION_STATUS (
FILENAME TEXT,
CONTENT VARIANT
);
29 changes: 16 additions & 13 deletions terraform/environments/dev/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 43 additions & 3 deletions terraform/environments/dev/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ terraform {
}
snowflake = {
source = "Snowflake-Labs/snowflake"
version = "0.69"
version = "0.71"
}
}

Expand All @@ -26,6 +26,13 @@ locals {
project = "pems"
region = "us-west-2"
locator = "NGB13288"

# These are circular dependencies on the outputs. Unfortunate, but
# necessary, as we don't know them until we've created the storage
# integration, which itself depends on the assume role policy.
storage_aws_external_id = "NGB13288_SFCRole=2_P94CCaZYR9XFUzpMIGN6HOit/zQ="
storage_aws_iam_user_arn = "arn:aws:iam::946158320428:user/uunc0000-s"
pipe_sqs_queue_arn = "arn:aws:sqs:us-west-2:946158320428:sf-snowpipe-AIDA5YS3OHMWCVTR5XHEE-YZjsweK3loK4rXlOJBWF_g"
}

provider "aws" {
Expand All @@ -49,6 +56,13 @@ provider "snowflake" {
role = "PUBLIC"
}

# Snowflake provider for account administration (to be used only when necessary).
provider "snowflake" {
alias = "accountadmin"
account = local.locator
role = "ACCOUNTADMIN"
}

# Snowflake provider for creating databases, warehouses, etc.
provider "snowflake" {
alias = "sysadmin"
Expand Down Expand Up @@ -80,8 +94,11 @@ module "s3_lake" {
aws = aws
}

prefix = "${local.owner}-${local.project}-${local.environment}"
region = local.region
prefix = "${local.owner}-${local.project}-${local.environment}"
region = local.region
snowflake_raw_storage_integration_iam_user_arn = local.storage_aws_iam_user_arn
snowflake_raw_storage_integration_external_id = local.storage_aws_external_id
snowflake_pipe_sqs_queue_arn = local.pipe_sqs_queue_arn
}

data "aws_iam_role" "mwaa_execution_role" {
Expand All @@ -97,6 +114,7 @@ resource "aws_iam_role_policy_attachment" "mwaa_execution_role" {
# Snowflake Infrastructure #
############################

# Main ELT architecture
module "elt" {
source = "github.com/cagov/data-infrastructure.git//terraform/snowflake/modules/elt?ref=74a522f"
providers = {
Expand All @@ -107,3 +125,25 @@ module "elt" {

environment = upper(local.environment)
}

module "snowflake_clearinghouse" {
source = "../../modules/snowflake-clearinghouse"
providers = {
snowflake.accountadmin = snowflake.accountadmin,
snowflake.securityadmin = snowflake.securityadmin,
snowflake.sysadmin = snowflake.sysadmin,
snowflake.useradmin = snowflake.useradmin,
}

environment = upper(local.environment)
s3_url = "s3://${module.s3_lake.pems_raw_bucket.name}"
storage_aws_role_arn = module.s3_lake.snowflake_storage_integration_role.arn
}

output "pems_raw_stage" {
value = module.snowflake_clearinghouse.pems_raw_stage
}

output "notification_channel" {
value = module.snowflake_clearinghouse.notification_channel
}
29 changes: 16 additions & 13 deletions terraform/environments/prd/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 52 additions & 3 deletions terraform/environments/prd/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ terraform {
}
snowflake = {
source = "Snowflake-Labs/snowflake"
version = "0.69"
version = "0.71"
}
}

Expand All @@ -26,6 +26,13 @@ locals {
project = "pems"
region = "us-west-2"
locator = "NGB13288"

# These are circular dependencies on the outputs. Unfortunate, but
# necessary, as we don't know them until we've created the storage
# integration, which itself depends on the assume role policy.
storage_aws_external_id = "NGB13288_SFCRole=2_GwkZr+HZcrKfUqvsrvBBu6gcqRs="
storage_aws_iam_user_arn = "arn:aws:iam::946158320428:user/uunc0000-s"
pipe_sqs_queue_arn = "arn:aws:sqs:us-west-2:946158320428:sf-snowpipe-AIDA5YS3OHMWCVTR5XHEE-YZjsweK3loK4rXlOJBWF_g"
}

provider "aws" {
Expand All @@ -49,6 +56,13 @@ provider "snowflake" {
role = "PUBLIC"
}

# Snowflake provider for account administration (to be used only when necessary).
provider "snowflake" {
alias = "accountadmin"
account = local.locator
role = "ACCOUNTADMIN"
}

# Snowflake provider for creating databases, warehouses, etc.
provider "snowflake" {
alias = "sysadmin"
Expand Down Expand Up @@ -80,14 +94,27 @@ module "s3_lake" {
aws = aws
}

prefix = "${local.owner}-${local.project}-${local.environment}"
region = local.region
prefix = "${local.owner}-${local.project}-${local.environment}"
region = local.region
snowflake_raw_storage_integration_iam_user_arn = local.storage_aws_iam_user_arn
snowflake_raw_storage_integration_external_id = local.storage_aws_external_id
snowflake_pipe_sqs_queue_arn = local.pipe_sqs_queue_arn
}

data "aws_iam_role" "mwaa_execution_role" {
name = "dse-infra-dev-us-west-2-mwaa-execution-role"
}

resource "aws_iam_role_policy_attachment" "mwaa_execution_role" {
role = data.aws_iam_role.mwaa_execution_role.name
policy_arn = module.s3_lake.pems_raw_read_write_policy.arn
}

############################
# Snowflake Infrastructure #
############################

# Main ELT architecture
module "elt" {
source = "github.com/cagov/data-infrastructure.git//terraform/snowflake/modules/elt?ref=74a522f"
providers = {
Expand All @@ -98,3 +125,25 @@ module "elt" {

environment = upper(local.environment)
}

module "snowflake_clearinghouse" {
source = "../../modules/snowflake-clearinghouse"
providers = {
snowflake.accountadmin = snowflake.accountadmin,
snowflake.securityadmin = snowflake.securityadmin,
snowflake.sysadmin = snowflake.sysadmin,
snowflake.useradmin = snowflake.useradmin,
}

environment = upper(local.environment)
s3_url = "s3://${module.s3_lake.pems_raw_bucket.name}"
storage_aws_role_arn = module.s3_lake.snowflake_storage_integration_role.arn
}

output "pems_raw_stage" {
value = module.snowflake_clearinghouse.pems_raw_stage
}

output "notification_channel" {
value = module.snowflake_clearinghouse.notification_channel
}
1 change: 1 addition & 0 deletions terraform/modules/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.terraform.lock.hcl
Loading

0 comments on commit b1814b0

Please sign in to comment.