diff --git a/README.md b/README.md index 14ec428..e729fed 100644 --- a/README.md +++ b/README.md @@ -130,15 +130,30 @@ Once created take the ARN of the certificate and set that ARN in environment_var # Secrets Secrets can be manually created in the -[AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) - -To pass secrets to a container set the secrets manager `secret name` -when creating a ServiceProp objects: +[AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html). +When naming your secret make sure that the secret does not end in a pattern that matches +`-??????`, this will cause issues with how AWS CDK looks up secrets. +To pass secrets to a container set the secrets manager `container_secrets` +when creating a `ServiceProp` object. You'll be creating a list of `ServiceSecret` objects: ```python +from src.service_props import ServiceProps, ServiceSecret + app_service_props = ServiceProps( - "app", 443, 1024, f"ghcr.io/sage-bionetworks/app:v1.0", container_env_vars={}, - container_secret_name="app/dev/DATABASE" + container_name="app", + container_port=443, + container_memory=1024, + container_location="ghcr.io/sage-bionetworks/app:v1.0", + container_secrets=[ + ServiceSecret( + secret_name="app/dev/DATABASE", + environment_key="NAME_OF_ENVIRONMENT_VARIABLE_SET_FOR_CONTAINER", + ), + ServiceSecret( + secret_name="app/dev/PASSWORD", + environment_key="SINGLE_VALUE_SECRET", + ) + ] ) ``` @@ -150,6 +165,26 @@ For example, the KVs for `app/dev/DATABASE` could be: } ``` +And the value for `app/dev/PASSWORD` could be: `password` + +In the application (Python) code the secrets may be loaded into a dict using code like: + +```python +import json +import os + +all_secrets_dict = json.loads(os.environ["NAME_OF_ENVIRONMENT_VARIABLE_SET_FOR_CONTAINER"]) +``` + +In the case of a single value you may load the value like: + +```python +import os + +my_secret = os.environ.get("SINGLE_VALUE_SECRET", None) +``` + + > [!NOTE] > Retrieving secrets requires access to the AWS Secrets Manager @@ -247,3 +282,101 @@ The workflow for continuous integration: * CI deploys changes to the staging environment (stage.app.io) in the AWS prod account. * Changes are promoted (or merged) to the git prod branch. * CI deploys changes to the prod environment (prod.app.io) in the AWS prod account. + +# Creation/Forwarding of OpenTelemetry data +Schematic has been instrumented with a mix of +[automationally instrumented libraries](https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation) +and [manual traces](https://opentelemetry-python.readthedocs.io/en/latest/api/trace.html). +In addition it's been configured at startup to [conditionally turn on trace/log exports](https://github.com/Sage-Bionetworks/schematic/blob/778bf54db9c5b4de0af334c4efe034b3dde0b348/schematic/__init__.py#L82-L139) +depending on how a few environment variables are set. The combination of these items let +the schematic container running in ECS export telemetry data out of the container to be +ingested somewhere else for long-term storage. + + +Schematic is configured to send it's telemetry data to the OpenTelemetry Collector +which then handles forwarding that data on to it's final destination. This is +accomplished by setting a few environment variables on the Schematic container such as: + +```python +from src.service_props import ServiceProps + +telemetry_environment_variables = { + "TRACING_EXPORT_FORMAT": "otlp", + "LOGGING_EXPORT_FORMAT": "otlp", + "TRACING_SERVICE_NAME": "schematic", + "LOGGING_SERVICE_NAME": "schematic", + "DEPLOYMENT_ENVIRONMENT": environment, + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://otel-collector:4318", +} + +app_service_props = ServiceProps( + container_name="schematic-app", + container_location="ghcr.io/sage-bionetworks/app:v1.0" + container_port=443, + container_memory=1024, + container_env_vars=telemetry_environment_variables, +) +``` + + +## OpenTelemetry Collector +The OpenTelemetry collector is deployed into ECS and is running in +[Gateway mode](https://opentelemetry.io/docs/collector/deployment/gateway/). This +configuration allows for a single collector to be the central point for all telemetry +data leaving the context of this deployed infrastructure. This central point allows us +to configure where [authorization can be attached](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/oauth2clientauthextension), +[requests batched up](https://github.com/open-telemetry/opentelemetry-collector/blob/main/processor/batchprocessor/README.md), or [sensitive data be stripped](https://docs.honeycomb.io/send-data/opentelemetry/collector/handle-sensitive-information/). + + +The configuration of all of these elements stems from [supplying a `config.yaml` file](https://opentelemetry.io/docs/collector/configuration/) as +an environment variable to the otel collector container at startup. This config file is +set up to be sourced from AWS Secrets manager. To accomplish this a filled out copy of +the following configuration file is stored in AWS Secrets manager (As Plaintext) +with the name `f"{stack_name_prefix}-DockerFargateStack/{environment}/opentelemetry-collector-configuration"`: + +``` +extensions: + health_check: + endpoint: "0.0.0.0:13133" + path: "/" + check_collector_pipeline: + enabled: true + interval: "5m" + exporter_failure_threshold: 5 + oauth2client: + client_id: FILL_ME_IN + client_secret: FILL_ME_IN + endpoint_params: + audience: FILL_ME_IN + token_url: FILL_ME_IN + # timeout for the token client + timeout: 2s + +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + send_batch_size: 50 + +exporters: + otlphttp/withauth: + endpoint: FILL_ME_IN + auth: + authenticator: oauth2client + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlphttp/withauth] + logs: + receivers: [otlp] + processors: [batch] + exporters: [otlphttp/withauth] + extensions: [health_check, oauth2client] +``` diff --git a/app.py b/app.py index a5b9e3c..55d41a1 100644 --- a/app.py +++ b/app.py @@ -1,11 +1,12 @@ +from os import environ + import aws_cdk as cdk -from os import environ -from src.network_stack import NetworkStack from src.ecs_stack import EcsStack -from src.service_stack import LoadBalancedServiceStack from src.load_balancer_stack import LoadBalancerStack -from src.service_props import ServiceProps +from src.network_stack import NetworkStack +from src.service_props import ServiceProps, ServiceSecret +from src.service_stack import LoadBalancedServiceStack, ServiceStack # get the environment and set environment specific variables VALID_ENVIRONMENTS = ["dev", "stage", "prod"] @@ -35,7 +36,7 @@ case _: valid_envs_str = ",".join(VALID_ENVIRONMENTS) raise SystemExit( - f"Must set environment variable `ENV` to one of {valid_envs_str}" + f"Must set environment variable `ENV` to one of {valid_envs_str}. Currently set to {environment}." ) stack_name_prefix = f"schematic-{environment}" @@ -68,12 +69,27 @@ cdk_app, f"{stack_name_prefix}-load-balancer", network_stack.vpc ) +telemetry_environment_variables = { + "TRACING_EXPORT_FORMAT": "otlp", + "LOGGING_EXPORT_FORMAT": "otlp", + "TRACING_SERVICE_NAME": "schematic", + "LOGGING_SERVICE_NAME": "schematic", + "DEPLOYMENT_ENVIRONMENT": environment, + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://otel-collector:4318", +} + app_service_props = ServiceProps( - "schematic-app", - "ghcr.io/sage-bionetworks/schematic:v0.1.90-beta", - 443, + container_name="schematic-app", + container_location="ghcr.io/sage-bionetworks/schematic:v0.1.94-beta", + container_port=443, container_memory=1024, - container_secret_name=f"{stack_name_prefix}-DockerFargateStack/{environment}/ecs", + container_env_vars=telemetry_environment_variables, + container_secrets=[ + ServiceSecret( + secret_name=f"{stack_name_prefix}-DockerFargateStack/{environment}/ecs", + environment_key="SECRETS_MANAGER_SECRETS", + ) + ], ) app_service_stack = LoadBalancedServiceStack( @@ -88,5 +104,27 @@ health_check_interval=5, ) -# Generate stacks +app_service_props_otel_collector = ServiceProps( + container_name="otel-collector", + container_port=4318, + container_memory=512, + container_location="ghcr.io/sage-bionetworks/sage-otel-collector:0.0.1", + container_secrets=[ + ServiceSecret( + secret_name=f"{stack_name_prefix}-DockerFargateStack/{environment}/opentelemetry-collector-configuration", + environment_key="CONFIG_CONTENT", + ) + ], + container_command=["--config", "env:CONFIG_CONTENT"], + container_healthcheck=cdk.aws_ecs.HealthCheck(command=["CMD", "/healthcheck"]), +) + +app_service_stack_otel_collector = ServiceStack( + scope=cdk_app, + construct_id=f"{stack_name_prefix}-otel-collector", + vpc=network_stack.vpc, + cluster=ecs_stack.cluster, + props=app_service_props_otel_collector, +) + cdk_app.synth() diff --git a/src/service_props.py b/src/service_props.py index 6b6b2f8..75c29cd 100644 --- a/src/service_props.py +++ b/src/service_props.py @@ -1,6 +1,28 @@ +from dataclasses import dataclass +from typing import List, Optional, Sequence + +from aws_cdk import aws_ecs as ecs + CONTAINER_LOCATION_PATH_ID = "path://" +@dataclass +class ServiceSecret: + """ + Holds onto configuration for the secrets to be used in the container. + + Attributes: + secret_name: The name of the secret as stored in the AWS Secrets Manager. + environment_key: The name of the environment variable to be set within the container. + """ + + secret_name: str + """The name of the secret as stored in the AWS Secrets Manager.""" + + environment_key: str + """The name of the environment variable to be set within the container.""" + + class ServiceProps: """ ECS service properties @@ -13,9 +35,11 @@ class ServiceProps: container_memory: the container application memory container_env_vars: a json dictionary of environment variables to pass into the container i.e. {"EnvA": "EnvValueA", "EnvB": "EnvValueB"} - container_secret_name: the secret's name in the AWS secrets manager + container_secrets: List of `ServiceSecret` resources to pull from AWS secrets manager auto_scale_min_capacity: the fargate auto scaling minimum capacity auto_scale_max_capacity: the fargate auto scaling maximum capacity + container_command: Optional commands to run during the container startup + container_healthcheck: Optional health check configuration for the container """ def __init__( @@ -25,9 +49,11 @@ def __init__( container_port: int, container_memory: int = 512, container_env_vars: dict = None, - container_secret_name: str = None, + container_secrets: List[ServiceSecret] = None, auto_scale_min_capacity: int = 1, auto_scale_max_capacity: int = 1, + container_command: Optional[Sequence[str]] = None, + container_healthcheck: Optional[ecs.HealthCheck] = None, ) -> None: self.container_name = container_name self.container_port = container_port @@ -37,8 +63,18 @@ def __init__( CONTAINER_LOCATION_PATH_ID ) self.container_location = container_location + if container_env_vars is None: self.container_env_vars = {} - self.container_secret_name = container_secret_name + else: + self.container_env_vars = container_env_vars + + if container_secrets is None: + self.container_secrets = [] + else: + self.container_secrets = container_secrets + self.auto_scale_min_capacity = auto_scale_min_capacity self.auto_scale_max_capacity = auto_scale_max_capacity + self.container_command = container_command + self.container_healthcheck = container_healthcheck diff --git a/src/service_stack.py b/src/service_stack.py index 3fa0ee7..eeb3614 100644 --- a/src/service_stack.py +++ b/src/service_stack.py @@ -1,17 +1,14 @@ import aws_cdk as cdk - -from aws_cdk import ( - Duration as duration, - aws_ecs as ecs, - aws_ec2 as ec2, - aws_logs as logs, - aws_elasticloadbalancingv2 as elbv2, - aws_certificatemanager as acm, - aws_iam as iam, - aws_secretsmanager as sm, -) - +from aws_cdk import Duration as duration +from aws_cdk import aws_certificatemanager as acm +from aws_cdk import aws_ec2 as ec2 +from aws_cdk import aws_ecs as ecs +from aws_cdk import aws_elasticloadbalancingv2 as elbv2 +from aws_cdk import aws_iam as iam +from aws_cdk import aws_logs as logs +from aws_cdk import aws_secretsmanager as sm from constructs import Construct + from src.service_props import ServiceProps ALB_HTTP_LISTENER_PORT = 80 @@ -79,16 +76,18 @@ def _get_secret(scope: Construct, id: str, name: str) -> sm.Secret: isecret = sm.Secret.from_secret_name_v2(scope, id, name) return ecs.Secret.from_secrets_manager(isecret) + secrets = {} + for secret in props.container_secrets: + secrets[secret.environment_key] = _get_secret( + self, f"sm-secrets-{secret.environment_key}", secret.secret_name + ) + self.container = self.task_definition.add_container( props.container_name, image=image, memory_limit_mib=props.container_memory, environment=props.container_env_vars, - secrets={ - "SECRETS_MANAGER_SECRETS": _get_secret( - self, "sm-secrets", props.container_secret_name - ) - }, + secrets=secrets, port_mappings=[ ecs.PortMapping( name=props.container_name, @@ -100,6 +99,8 @@ def _get_secret(scope: Construct, id: str, name: str) -> sm.Secret: stream_prefix=f"{construct_id}", log_retention=logs.RetentionDays.FOUR_MONTHS, ), + command=props.container_command, + health_check=props.container_healthcheck, ) self.security_group = ec2.SecurityGroup(self, "SecurityGroup", vpc=vpc)