diff --git a/bin/single-new-eks-cost-monitoring.ts b/bin/single-new-eks-cost-monitoring.ts new file mode 100644 index 00000000..129038b6 --- /dev/null +++ b/bin/single-new-eks-cost-monitoring.ts @@ -0,0 +1,11 @@ +import { configureApp, errorHandler } from '../lib/common/construct-utils'; +import SingleNewEksCostMonitoringPattern from '../lib/single-new-eks-cost-monitoring-pattern'; + +const app = configureApp(); + +new SingleNewEksCostMonitoringPattern() + .buildAsync(app, 'single-new-eks-cost-monitoring') + .catch((e) => { + errorHandler(app, "Secure Ingress Cost monitoring Pattern could not be deployed. \ + See Secure Ingress Cost monitoring pattern in the readme for instructions", e); + }); \ No newline at end of file diff --git a/cdk.json b/cdk.json index c02d52c4..53b9b239 100644 --- a/cdk.json +++ b/cdk.json @@ -58,4 +58,4 @@ "existing.cluster.name": "single-new-eks-observability-accelerator", "existing.kubectl.rolename": "YOUR_KUBECTL_ROLE" } -} +} \ No newline at end of file diff --git a/docs/patterns/images/costmonitoring-ingress.png b/docs/patterns/images/costmonitoring-ingress.png new file mode 100644 index 00000000..d41745e3 Binary files /dev/null and b/docs/patterns/images/costmonitoring-ingress.png differ diff --git a/docs/patterns/images/kubecost-dashboard.png b/docs/patterns/images/kubecost-dashboard.png new file mode 100644 index 00000000..053e7976 Binary files /dev/null and b/docs/patterns/images/kubecost-dashboard.png differ diff --git a/docs/patterns/images/kubecost-namespace-dashboard.png b/docs/patterns/images/kubecost-namespace-dashboard.png new file mode 100644 index 00000000..0b22eddb Binary files /dev/null and b/docs/patterns/images/kubecost-namespace-dashboard.png differ diff --git a/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-cost-monitoring-ingress-observability.md b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-cost-monitoring-ingress-observability.md new file mode 100644 index 00000000..490e5c09 --- /dev/null +++ b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-cost-monitoring-ingress-observability.md @@ -0,0 +1,151 @@ +# Single Cluster Observability - Kubecost Cost Monitoring with Secure Ingress using Cognito + +Implementing Kubecost for monitoring EKS clusters provides invaluable insights into resource utilization and cost management. Kubecost offers granular visibility into the cost breakdown of Kubernetes workloads, enabling efficient allocation of resources and optimization of infrastructure spending. By integrating with Amazon Managed Prometheus (AMP) and AWS services such as Application Load Balancer, Amazon Cognito, and Amazon Route 53, Kubecost ensures a comprehensive monitoring solution with secure access control mechanisms. With alerts and recording rules provided by Amazon Managed Service for Prometheus, teams can proactively identify and address potential issues, ensuring optimal performance and cost-effectiveness of EKS deployments. Kubecost's user-friendly dashboard and reporting capabilities empower organizations to make informed decisions, maximize resource efficiency, and maintain cost predictability in their EKS environments, ultimately enhancing operational excellence and driving business growth. + +## Architecture + +The following figure illustrates the architecture of the pattern we will be deploying for Single EKS cost monitoring (Kubecost) pattern with Application Load Balancer, Amazon Cognito, and a Transport Layer Security (TLS) Certificate on AWS Certificate Manager (ACM) with Amazon Route 53 hosted zone to authenticate users to Kubecost + +![Architecture](../images/costmonitoring-ingress.png) + +## Objective + +- Deploys one production grade Amazon EKS cluster. +- AWS Kubecost with Amazon Managed Prometheus (AMP) integration +- [Secure Ingress with AWS Cognito](https://aws.amazon.com/blogs/containers/securing-kubecost-access-with-amazon-cognito/) +- AWS Certificate Manager with Amazon Route 53 hosted zone +- Alerts and recording rules with Amazon Managed Service for Prometheus + +## Prerequisites: + +An existing hosted zone in Route53 with the ability to add records. + +Ensure that you have installed the following tools on your machine. + +1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) +2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) +3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) +4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) + +## Configuring domain + +The CDK code expects the allowed domain and subdomain names in the CDK context file (cdk.json). + +Create two environment variables. The PARENT_HOSTED_ZONE variable contains the name of your Route 53 public hosted zone. The DEV_SUBZONE_NAME will be the address for your Kubecost dashboard. + +When users register to cognito they will have to provide an email address, using the `allowed.domains.list` you can specify you enterprise's email domain to only allow your employees to sign up for the service + +Generate the cdk.json file: + +```bash +PARENT_HOSTED_ZONE=mycompany.a2z.com +DEV_SUBZONE_NAME=kubecost.mycompany.a2z.com +ALLOWED_DOMAIN_LIST=amazon.com +cat << EOF > cdk.json +{ + "app": "npx ts-node dist/lib/common/default-main.js", + "context": { + "parent.hostedzone.name": "${PARENT_HOSTED_ZONE}", + "dev.subzone.name": "${DEV_SUBZONE_NAME}", + "allowed.domains.list": "${ALLOWED_DOMAIN_LIST}" + } +} +EOF +``` + + +## Deploying + +Please follow the _Deploying_ instructions of the [New EKS Cluster Open Source Observability Accelerator](./single-new-eks-opensource-observability.md) pattern till step 7. +At step 8, execute the following + +```bash +make build +make pattern single-new-eks-cost-monitoring deploy +``` +## Verifying A record for Route53 + +Open the AWS console once the deployment is complete. +Navigate to Route53 in AWS console and select the hosted zone you used for the deployment. +Verify the entry of a record matching the DEV_SUBZONE_NAME we used. + +## Verify the resources + +Run update-kubeconfig command. You should be able to get the command from CDK output message. + +```bash +aws eks update-kubeconfig --name single-new-eks-fargate-opensource-observability-accelerator --region --role-arn arn:aws:iam::xxxxxxxxx:role/single-new-eks-fargate-op-singleneweksfargateopens-xxxxxxxx +``` + + +Let’s verify the resources created by steps above. + +```bash +kubectl get pods -o wide -A +``` +``` +Output: +NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +amazon-guardduty aws-guardduty-agent-5lblf 1/1 Running 0 3h43m 10.0.184.135 ip-10-0-184-135.us-west-2.compute.internal +amazon-guardduty aws-guardduty-agent-qzm4j 1/1 Running 0 3h43m 10.0.153.58 ip-10-0-153-58.us-west-2.compute.internal +argocd blueprints-addon-argocd-application-controller-0 1/1 Running 0 3h40m 10.0.128.187 ip-10-0-153-58.us-west-2.compute.internal +argocd blueprints-addon-argocd-applicationset-controller-7d77d5cdjjhm8 1/1 Running 0 3h40m 10.0.148.136 ip-10-0-153-58.us-west-2.compute.internal +argocd blueprints-addon-argocd-dex-server-84dc54844f-lwgss 1/1 Running 0 3h40m 10.0.178.159 ip-10-0-184-135.us-west-2.compute.internal +argocd blueprints-addon-argocd-notifications-controller-597477df8q4btr 1/1 Running 0 3h40m 10.0.166.196 ip-10-0-184-135.us-west-2.compute.internal +argocd blueprints-addon-argocd-redis-79cb6b87dc-tddlm 1/1 Running 0 3h40m 10.0.160.149 ip-10-0-184-135.us-west-2.compute.internal +argocd blueprints-addon-argocd-repo-server-584549c456-5gfs8 1/1 Running 0 3h40m 10.0.146.88 ip-10-0-153-58.us-west-2.compute.internal +argocd blueprints-addon-argocd-server-7b7b488dd4-686tx 1/1 Running 0 3h40m 10.0.175.70 ip-10-0-184-135.us-west-2.compute.internal +aws-for-fluent-bit blueprints-addon-aws-fluent-bit-for-cw-aws-for-fluent-bit-lr99l 1/1 Running 0 3h40m 10.0.160.194 ip-10-0-184-135.us-west-2.compute.internal +aws-for-fluent-bit blueprints-addon-aws-fluent-bit-for-cw-aws-for-fluent-bit-z2pm7 1/1 Running 0 3h40m 10.0.146.233 ip-10-0-153-58.us-west-2.compute.internal +cert-manager cert-manager-6d988558d6-wm746 1/1 Running 0 3h40m 10.0.188.100 ip-10-0-184-135.us-west-2.compute.internal +cert-manager cert-manager-cainjector-6976895488-mk9sw 1/1 Running 0 3h40m 10.0.173.79 ip-10-0-184-135.us-west-2.compute.internal +cert-manager cert-manager-webhook-fcf48cc54-92wqm 1/1 Running 0 3h40m 10.0.133.37 ip-10-0-153-58.us-west-2.compute.internal +default otel-collector-amp-collector-6d768bcbf5-vbmqr 1/1 Running 0 3h39m 10.0.171.253 ip-10-0-184-135.us-west-2.compute.internal +external-dns blueprints-addon-external-dns-78bcd6c7c5-df74q 1/1 Running 0 3h40m 10.0.180.87 ip-10-0-184-135.us-west-2.compute.internal +external-secrets blueprints-addon-external-secrets-675f847b97-kbn98 1/1 Running 0 3h40m 10.0.178.180 ip-10-0-184-135.us-west-2.compute.internal +external-secrets blueprints-addon-external-secrets-cert-controller-68cbb65dspf8c 1/1 Running 0 3h40m 10.0.154.4 ip-10-0-153-58.us-west-2.compute.internal +external-secrets blueprints-addon-external-secrets-webhook-6cfdbdf896-j9ng7 1/1 Running 0 3h40m 10.0.142.78 ip-10-0-153-58.us-west-2.compute.internal +kube-system aws-load-balancer-controller-7cd4b895d4-gvxtv 1/1 Running 0 3h40m 10.0.131.188 ip-10-0-153-58.us-west-2.compute.internal +kube-system aws-load-balancer-controller-7cd4b895d4-m2dh5 1/1 Running 0 3h40m 10.0.173.13 ip-10-0-184-135.us-west-2.compute.internal +kube-system aws-node-7l22p 2/2 Running 0 3h43m 10.0.184.135 ip-10-0-184-135.us-west-2.compute.internal +kube-system aws-node-rfc76 2/2 Running 0 3h43m 10.0.153.58 ip-10-0-153-58.us-west-2.compute.internal +kube-system blueprints-addon-metrics-server-7cb6564d98-jhwmj 1/1 Running 0 3h40m 10.0.182.218 ip-10-0-184-135.us-west-2.compute.internal +kube-system blueprints-addon-secret-store-csi-driver-secrets-store-csi5fbjj 3/3 Running 0 3h40m 10.0.190.108 ip-10-0-184-135.us-west-2.compute.internal +kube-system blueprints-addon-secret-store-csi-driver-secrets-store-csigdgfd 3/3 Running 0 3h40m 10.0.148.234 ip-10-0-153-58.us-west-2.compute.internal +kube-system coredns-5b8cc885bc-t9dpp 1/1 Running 0 3h47m 10.0.132.167 ip-10-0-153-58.us-west-2.compute.internal +kube-system coredns-5b8cc885bc-tkq6g 1/1 Running 0 3h47m 10.0.152.126 ip-10-0-153-58.us-west-2.compute.internal +kube-system csi-secrets-store-provider-aws-ktklg 1/1 Running 0 3h40m 10.0.190.207 ip-10-0-184-135.us-west-2.compute.internal +kube-system csi-secrets-store-provider-aws-qmg44 1/1 Running 0 3h40m 10.0.142.192 ip-10-0-153-58.us-west-2.compute.internal +kube-system ebs-csi-controller-5c4b7b9549-cvv8b 6/6 Running 0 3h40m 10.0.163.2 ip-10-0-184-135.us-west-2.compute.internal +kube-system ebs-csi-controller-5c4b7b9549-d9wfc 6/6 Running 0 3h40m 10.0.146.91 ip-10-0-153-58.us-west-2.compute.internal +kube-system ebs-csi-node-9sxtr 3/3 Running 0 3h40m 10.0.155.48 ip-10-0-153-58.us-west-2.compute.internal +kube-system ebs-csi-node-bcsjk 3/3 Running 0 3h40m 10.0.187.96 ip-10-0-184-135.us-west-2.compute.internal +kube-system kube-proxy-djbgh 1/1 Running 0 3h43m 10.0.153.58 ip-10-0-153-58.us-west-2.compute.internal +kube-system kube-proxy-mck62 1/1 Running 0 3h43m 10.0.184.135 ip-10-0-184-135.us-west-2.compute.internal +kube-system kube-state-metrics-6cf6f65cf7-nzqkb 1/1 Running 0 3h40m 10.0.186.50 ip-10-0-184-135.us-west-2.compute.internal +kube-system ssm-installer-fsfjn 1/1 Running 0 3h41m 10.0.189.79 ip-10-0-184-135.us-west-2.compute.internal +kube-system ssm-installer-vbqqm 1/1 Running 0 3h41m 10.0.154.228 ip-10-0-153-58.us-west-2.compute.internal +kubecost kubecost-cost-analyzer-5769d5f47f-fjwkz 3/3 Running 0 3h40m 10.0.137.248 ip-10-0-153-58.us-west-2.compute.internal +kubecost kubecost-cost-analyzer-prometheus-server-6f48bdc56c-d6789 2/2 Running 0 3h40m 10.0.187.76 ip-10-0-184-135.us-west-2.compute.internal +opentelemetry-operator-system opentelemetry-operator-98f5b9c89-7kp6x 2/2 Running 0 3h39m 10.0.148.126 ip-10-0-153-58.us-west-2.compute.internal +prometheus-node-exporter prometheus-node-exporter-czsx8 1/1 Running 0 3h40m 10.0.184.135 ip-10-0-184-135.us-west-2.compute.internal +prometheus-node-exporter prometheus-node-exporter-jg9tw 1/1 Running 0 3h40m 10.0.153.58 ip-10-0-153-58.us-west-2.compute.internal +``` + +Now, lets navigate to the URL described as our dev.subzone.name in the cdk.json file and signup with a new cognito user profile. + +- **Kubecost Dashboards** + +![kubecost-dashboard](../images/kubecost-dashboard.png) + +- **Kubecost Namespace Dashboards** + +![kubecost-dashboard](../images/kubecost-namespace-dashboard.png) + +## Teardown + +You can teardown the whole CDK stack with the following command: + +```bash +make pattern single-new-eks-cost-monitoring destroy +``` \ No newline at end of file diff --git a/lib/common/cognito/cognito-idp-stack.ts b/lib/common/cognito/cognito-idp-stack.ts new file mode 100644 index 00000000..5c568b7d --- /dev/null +++ b/lib/common/cognito/cognito-idp-stack.ts @@ -0,0 +1,129 @@ +import * as cdk from 'aws-cdk-lib'; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import { Construct } from 'constructs'; +import * as cognito from 'aws-cdk-lib/aws-cognito'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +export default class CognitoIdpStack extends cdk.Stack { + + public readonly userPoolOut: cognito.UserPool; + public readonly userPoolClientOut: cognito.UserPoolClient; + public readonly userPoolDomainOut: cognito.UserPoolDomain; + + constructor(scope: Construct, id: string, subDomain: string, props?: cdk.StackProps) { + super(scope, id, props); + + const lambdaExecutionRole = new iam.Role(this, 'Lambda Execution Role', { + assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), + }); + + lambdaExecutionRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName("service-role/AWSLambdaBasicExecutionRole")); + lambdaExecutionRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSSMReadOnlyAccess")); + + const authChallengeFn = new lambda.Function(this, 'authChallengeFn', { + runtime: lambda.Runtime.PYTHON_3_12, + code: lambda.Code.fromAsset('./lib/common/cognito/lambda'), + handler: 'lambda_function.lambda_handler', + role: lambdaExecutionRole, + environment: { + "ALLOWED_DOMAINS_LIST": blueprints.utils.valueFromContext(scope, "allowed.domains.list", "amazon.com") + } + }); + + + // Cognito User Pool + const userPool = new cognito.UserPool(this, 'CognitoIDPUserPool', { + userPoolName: 'CognitoIDPUserPool', + selfSignUpEnabled: true, + signInAliases: { + email: true, + username: true + }, + standardAttributes: { + email: { + mutable: true, + required: true + }, + givenName: { + mutable: true, + required: true + }, + familyName: { + mutable: true, + required: true + } + }, + lambdaTriggers: { + preSignUp: authChallengeFn, + preAuthentication: authChallengeFn, + }, + }); + + + // Output the User Pool ID + + this.userPoolOut = userPool; + + new cdk.CfnOutput(this, 'CognitoIDPUserPoolOut', { + value: userPool.userPoolId, + exportName: 'CognitoIDPUserPoolId' + }); + + new cdk.CfnOutput(this, 'CognitoIDPUserPoolArnOut', { + value: userPool.userPoolArn, + exportName: 'CognitoIDPUserPoolArn' + }); + + + // We will ask the IDP to redirect back to our domain's index page + const redirectUri = `https://${subDomain}/oauth2/idpresponse`; + + // Configure the user pool client application + const userPoolClient = new cognito.UserPoolClient(this, 'CognitoAppClient', { + userPool, + authFlows: { + userPassword: true + }, + oAuth: { + flows: { + authorizationCodeGrant: true + }, + scopes: [ + cognito.OAuthScope.OPENID + ], + callbackUrls: [redirectUri] + // TODO - What about logoutUrls? + }, + generateSecret: true, + userPoolClientName: 'Web', + supportedIdentityProviders: [cognito.UserPoolClientIdentityProvider.COGNITO] + }); + + // Output the User Pool App Client ID + this.userPoolClientOut = userPoolClient; + + new cdk.CfnOutput(this, 'CognitoIDPUserPoolClientOut', { + value: userPoolClient.userPoolClientId, + exportName: 'CognitoIDPUserPoolClientId' + }); + + // Add the domain to the user pool + const randomText = (Math.random() + 1).toString(36).substring(7); + const userPoolDomain = userPool.addDomain('CognitoDomain', { + cognitoDomain: { + domainPrefix: `my-cdk-blueprint-${randomText}`, + }, + }); + + // Output the User Pool App Client ID + + this.userPoolDomainOut = userPoolDomain; + + new cdk.CfnOutput(this, 'CognitoIDPUserPoolDomainOut', { + value: userPoolDomain.domainName, + exportName: 'CognitoIDPUserPoolDomain' + }); + + } +} \ No newline at end of file diff --git a/lib/common/cognito/lambda/lambda_function.py b/lib/common/cognito/lambda/lambda_function.py new file mode 100644 index 00000000..9e1ec394 --- /dev/null +++ b/lib/common/cognito/lambda/lambda_function.py @@ -0,0 +1,38 @@ +import json +import os +import boto3 + +def lambda_handler(event, context): + print("Received event: " + json.dumps(event, indent=2)) + + ssmclient = boto3.client('ssm') + + try: + allowed_domains_list = os.environ.get("ALLOWED_DOMAINS_LIST", "example.com") + + except Exception as e: + print("Error in reading the SSM Parameter Store : {}".format(str(e))) + + triggerSource = event['triggerSource'] + + # Split the email address so we can compare domains + emailId = event['request']['userAttributes']['email'] + address = emailId.split('@') + + emailDomain = address[1] + + print("Running the Validation for {} flow".format(triggerSource)) + + if triggerSource == 'PreSignUp_SignUp': + # It sets the user pool autoConfirmUser flag after validating the email domain + event['response']['autoConfirmUser'] = False + + # This example uses a custom attribute 'custom:domain' + if emailDomain not in allowed_domains_list: + raise Exception("Cannot register users with email domains other than allowed domains list={}".format(allowed_domains_list)) + else: + print("triggerSource={} is incorrect".format(triggerSource)) + + #print("Received event: " + json.dumps(event, indent=2)) + + return event \ No newline at end of file diff --git a/lib/common/resources/otel-collector-config.yml b/lib/common/resources/otel-collector-config.yml index 179b49d5..d9f385aa 100644 --- a/lib/common/resources/otel-collector-config.yml +++ b/lib/common/resources/otel-collector-config.yml @@ -44,6 +44,19 @@ spec: external_labels: cluster: "{{clusterName}}" scrape_configs: + {{ start kubecostJob }} + - job_name: kubecost + honor_labels: true + scrape_interval: 1m + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + dns_sd_configs: + - names: + - "{{subdomain}}" + type: 'A' + port: 9003 + {{ stop kubecostJob }} {{ start enableAdotMetricsCollectionJob }} - job_name: otel-collector-metrics scrape_interval: 10s diff --git a/lib/single-new-eks-cost-monitoring-pattern/index.ts b/lib/single-new-eks-cost-monitoring-pattern/index.ts new file mode 100644 index 00000000..ba78077b --- /dev/null +++ b/lib/single-new-eks-cost-monitoring-pattern/index.ts @@ -0,0 +1,255 @@ +import { Construct } from 'constructs'; +import * as cdk from "aws-cdk-lib"; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import { GlobalResources, LookupHostedZoneProvider, ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; +import { KubecostServiceAccountsAddon } from './kubecostserviceaccountsaddon'; +import { KubecostAddOn, KubecostAddOnProps } from '@kubecost/kubecost-eks-blueprints-addon'; +import * as amp from 'aws-cdk-lib/aws-aps'; +import * as eks from 'aws-cdk-lib/aws-eks'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import { ICertificate } from 'aws-cdk-lib/aws-certificatemanager'; +import { setPath } from '@aws-quickstart/eks-blueprints/dist/utils'; +import CognitoIdpStack from '../common/cognito/cognito-idp-stack'; +import * as fs from 'fs'; + +const gitUrl = 'https://github.com/aws-samples/eks-blueprints-workloads.git'; + +export default class SingleNewEksCostMonitoringPattern extends cdk.Stack { + async buildAsync(scope: Construct, id: string) { + + const subdomain: string = blueprints.utils.valueFromContext(scope, "dev.subzone.name", "dev.mycompany.a2z.com"); + const parentDomain = blueprints.utils.valueFromContext(scope, "parent.hostedzone.name", "mycompany.a2z.com"); + const certificate: ICertificate = blueprints.getNamedResource(GlobalResources.Certificate); + + const cognitoIdpStackOut = new CognitoIdpStack (scope,'cognito-idp-stack', subdomain, + { + env: { + account: process.env.CDK_DEFAULT_ACCOUNT, + region: process.env.CDK_DEFAULT_REGION, + }, + }); + + + const stackId = `${id}-observability-accelerator`; + const account = process.env.COA_ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; + const region = process.env.COA_AWS_REGION! || process.env.CDK_DEFAULT_REGION!; + + const ampWorkspaceName = process.env.COA_AMP_WORKSPACE_NAME! || 'observability-amp-workspace'; + const ampWorkspace = blueprints.getNamedResource(ampWorkspaceName) as unknown as amp.CfnWorkspace; + const ampEndpoint = ampWorkspace.attrPrometheusEndpoint; + const ampWorkspaceArn = ampWorkspace.attrArn; + + const ampAddOnProps: blueprints.AmpAddOnProps = { + ampPrometheusEndpoint: ampEndpoint, + ampRules: { + ampWorkspaceArn: ampWorkspaceArn, + ruleFilePaths: [ + __dirname + '/../common/resources/amp-config/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/recording-rules.yml' + ] + } + }; + + let doc = blueprints.utils.readYamlDocument(__dirname + '/../common/resources/otel-collector-config.yml'); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start kubecostJob }}", + "{{ stop kubecostJob }}", + true + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableJavaMonJob }}", + "{{ stop enableJavaMonJob }}", + false + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableNginxMonJob }}", + "{{ stop enableNginxMonJob }}", + false + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableIstioMonJob }}", + "{{ stop enableIstioMonJob }}", + false + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableAPIserverJob }}", + "{{ stop enableAPIserverJob }}", + false + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotMetricsCollectionJob }}", + "{{ stop enableAdotMetricsCollectionJob }}", + false + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotMetricsCollectionTelemetry }}", + "{{ stop enableAdotMetricsCollectionTelemetry }}", + true + ); + + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotContainerLogsReceiver }}", + "{{ stop enableAdotContainerLogsReceiver }}", + true + ); + doc = blueprints.utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotContainerLogsExporter }}", + "{{ stop enableAdotContainerLogsExporter }}", + true + ); + + fs.writeFileSync(__dirname + '/../common/resources/otel-collector-config-new.yml', doc); + + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + subdomain: subdomain, + logGroupName: `/aws/eks/costmonitoring/${ampWorkspaceName}`, + logStreamName: `$NODE_NAME`, + logRetentionDays: 30, + awsRegion: region + } + }; + + const addOns: Array = [ + new blueprints.addons.CloudWatchLogsAddon({ + logGroupPrefix: `/aws/eks/${stackId}`, + logRetentionDays: 30 + }), + new blueprints.addons.AwsLoadBalancerControllerAddOn(), + new blueprints.addons.CertManagerAddOn(), + new blueprints.addons.AdotCollectorAddOn(), + new blueprints.addons.CoreDnsAddOn(), + new blueprints.addons.ExternalDnsAddOn({ + hostedZoneResources: [GlobalResources.HostedZone] + }), + new blueprints.addons.ExternalsSecretsAddOn(), + new blueprints.addons.KubeProxyAddOn(), + new blueprints.addons.KubeStateMetricsAddOn(), + new blueprints.addons.MetricsServerAddOn(), + new blueprints.addons.EbsCsiDriverAddOn(), + new blueprints.addons.AmpAddOn(ampAddOnProps), + new blueprints.addons.PrometheusNodeExporterAddOn(), + new blueprints.SecretsStoreAddOn({ rotationPollInterval: "120s" }), + new blueprints.SSMAgentAddOn(), + new KubeCostExtensionAddon({ + namespace:"kubecost", + version:"1.108.1", + values: { + global: { + amp: { + prometheusServerEndpoint: ampWorkspace.attrWorkspaceId, + enabled: true, + sigv4: { + region: region + } + }, + grafana: { + enabled: false, + proxy: false + } + }, + kubecostProductConfigs: { + clusterName: stackId, + projectID: account + }, + prometheus: { + nodeExporter: { + enabled: false + }, + serviceAccounts:{ + nodeExporter:{ + create: false + } + }, + server: { + global: { + external_labels: { + cluster_id: stackId + } + } + } + }, + serviceAccount:{ + name: "kubecost-cost-analyzer-amp", + create: false, + server: { + create: false, + name: "kubecost-prometheus-server-amp" + } + } + } + + }), + new blueprints.ArgoCDAddOn({ + bootstrapRepo: { + repoUrl: gitUrl, + targetRevision: "main", + path: 'secure-ingress-cognito/envs/dev' + }, + bootstrapValues: { + spec: { + ingress: { + host: subdomain, + cognitoUserPoolArn: cognitoIdpStackOut.userPoolOut.userPoolArn, + cognitoUserPoolAppId: cognitoIdpStackOut.userPoolClientOut.userPoolClientId, + cognitoDomainName: cognitoIdpStackOut.userPoolDomainOut.domainName, + certificateArn: certificate.certificateArn, + region: process.env.CDK_DEFAULT_REGION, + } + }, + } + }), + new KubecostServiceAccountsAddon() + ]; + + const mngProps: blueprints.MngClusterProviderProps = { + version: eks.KubernetesVersion.of("1.28"), + instanceTypes: [new ec2.InstanceType("m5.2xlarge")], + amiType: eks.NodegroupAmiType.AL2_X86_64, + desiredSize: 2, + maxSize: 3, + }; + + await ObservabilityBuilder.builder() + .account(account) + .region(region) + .clusterProvider(new blueprints.MngClusterProvider(mngProps)) + .version('auto') + .resourceProvider(GlobalResources.HostedZone, new LookupHostedZoneProvider(parentDomain)) + .resourceProvider(GlobalResources.Certificate, new blueprints.CreateCertificateProvider('secure-ingress-cert', `${subdomain}`, GlobalResources.HostedZone)) + .resourceProvider(ampWorkspaceName, new blueprints.CreateAmpProvider(ampWorkspaceName, ampWorkspaceName)) + .addOns(...addOns) + .buildAsync(scope, stackId); + } +} + +class KubeCostExtensionAddon extends KubecostAddOn { + constructor(props?: KubecostAddOnProps) { + super(props); + } + + deploy(clusterInfo: blueprints.ClusterInfo): Promise { + const region = process.env.COA_AWS_REGION! || process.env.CDK_DEFAULT_REGION!; + const ampWorkspaceId = this.options.values!.global.amp.prometheusServerEndpoint; + const prometheusServerEndpoint = 'http://localhost:8005/workspaces/' + ampWorkspaceId; + const remoteWriteEndpoint = `https://aps-workspaces.${region}.amazonaws.com/workspaces/${ampWorkspaceId}/api/v1/remote_write`; + const sigV4ProxyHost = `aps-workspaces.${region}.amazonaws.com`; + setPath(this.options!.values, "global.amp.prometheusServerEndpoint", prometheusServerEndpoint); + setPath(this.options!.values, "global.amp.remoteWriteService", remoteWriteEndpoint); + setPath(this.options!.values, "global.amp.sigv4.region", region); + setPath(this.options!.values, "global.prometheus.fqdn", remoteWriteEndpoint); + setPath(this.options!.values, "sigV4Proxy.region", region); + setPath(this.options!.values, "sigV4Proxy.host", sigV4ProxyHost); + return super.deploy(clusterInfo); + } +} \ No newline at end of file diff --git a/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts b/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts new file mode 100644 index 00000000..301dc361 --- /dev/null +++ b/lib/single-new-eks-cost-monitoring-pattern/kubecostserviceaccountsaddon.ts @@ -0,0 +1,74 @@ +import 'source-map-support/register'; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import * as eks from "aws-cdk-lib/aws-eks"; +import { ManagedPolicy } from "aws-cdk-lib/aws-iam"; +import { Construct } from 'constructs'; + + +export class KubecostServiceAccountsAddon implements blueprints.ClusterAddOn { + id?: string | undefined; + @blueprints.utils.dependable(blueprints.addons.ExternalsSecretsAddOn.name) + deploy(clusterInfo: blueprints.ClusterInfo): void | Promise { + const cluster = clusterInfo.cluster; + + const policyRead = ManagedPolicy.fromAwsManagedPolicyName("AmazonPrometheusQueryAccess"); + const policyWrite = ManagedPolicy.fromAwsManagedPolicyName("AmazonPrometheusRemoteWriteAccess"); + const policyEC2 = ManagedPolicy.fromAwsManagedPolicyName("AmazonEC2ReadOnlyAccess"); // Needed since kubecost cost analyzer needs to access ec2:DescribeVolumes + + const serviceAccount1 = cluster.addServiceAccount("kubecost-cost-analyzer-amp", { + name: "kubecost-cost-analyzer-amp", + namespace: "kubecost" + }); + + + serviceAccount1.role.addManagedPolicy(policyRead); + serviceAccount1.role.addManagedPolicy(policyWrite); + serviceAccount1.role.addManagedPolicy(policyEC2); + + const serviceAccount2 = cluster.addServiceAccount("kubecost-prometheus-server-amp", { + name: "kubecost-prometheus-server-amp", + namespace: "kubecost" + }); + + serviceAccount2.role.addManagedPolicy(policyRead); + serviceAccount2.role.addManagedPolicy(policyWrite); + + const namespace = blueprints.utils.createNamespace("kubecost",cluster); + + serviceAccount1.node.addDependency(namespace); + serviceAccount2.node.addDependency(namespace); + + const secretStore = new eks.KubernetesManifest(clusterInfo.cluster.stack, "ClusterSecretStore", { + cluster: cluster, + manifest: [ + { + apiVersion: "external-secrets.io/v1beta1", + kind: "ClusterSecretStore", + metadata: { + name: "ssm-parameter-store", + namespace: "default" + }, + spec: { + provider: { + aws: { + service: "ParameterStore", + region: clusterInfo.cluster.stack.region, + auth: { + jwt: { + serviceAccountRef: { + name: "external-secrets-sa", + namespace: "external-secrets", + }, + }, + }, + }, + }, + }, + }, + ], + }); + + + return Promise.resolve(secretStore); + } +} \ No newline at end of file diff --git a/lib/single-new-eks-fargate-opensource-observability-pattern/fluentbitconfigmap.ts b/lib/single-new-eks-fargate-opensource-observability-pattern/fluentbitconfigmap.ts index cb3f10ca..3946c0c0 100644 --- a/lib/single-new-eks-fargate-opensource-observability-pattern/fluentbitconfigmap.ts +++ b/lib/single-new-eks-fargate-opensource-observability-pattern/fluentbitconfigmap.ts @@ -53,7 +53,6 @@ export class FluentBitConfigMap implements blueprints.ClusterAddOn { } deploy(clusterInfo: blueprints.ClusterInfo): void { - const cluster = clusterInfo.cluster; const doc = readYamlDocument(__dirname + '/../common/resources/fluent-bit/fluent-bit-fargate-config.ytpl'); const manifest = doc.split("---").map(e => loadYaml(e)); diff --git a/lib/single-new-eks-fargate-opensource-observability-pattern/index.ts b/lib/single-new-eks-fargate-opensource-observability-pattern/index.ts index 4e57461a..ffaefd80 100644 --- a/lib/single-new-eks-fargate-opensource-observability-pattern/index.ts +++ b/lib/single-new-eks-fargate-opensource-observability-pattern/index.ts @@ -119,12 +119,14 @@ export default class SingleNewEksFargateOpenSourceObservabilityConstruct { logStreamPrefix: "from-fluent-bit-", } as FluentBitConfigMapProps; + const coreDnsAddOnProps : blueprints.CoreDnsAddOnProps = { + configurationValues: { computeType: "Fargate" } + }; + Reflect.defineMetadata("ordered", true, blueprints.addons.GrafanaOperatorAddon); const addOns: Array = [ new blueprints.addons.VpcCniAddOn(), - new blueprints.addons.CoreDnsAddOn("auto",{ - configurationValues: { computeType: "Fargate" } - }), + new blueprints.addons.CoreDnsAddOn("v1.10.1-eksbuild.6",coreDnsAddOnProps), new blueprints.addons.KubeProxyAddOn(), new blueprints.addons.AwsLoadBalancerControllerAddOn(), new blueprints.addons.CertManagerAddOn({ diff --git a/mkdocs.yml b/mkdocs.yml index 7b839441..8097bf94 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -48,8 +48,9 @@ nav: - OSS ADOT Collector Mon: patterns/single-new-eks-observability-accelerators/single-new-eks-adotmetrics-collection-opensource-observability.md - OSS Istio Mon: patterns/single-new-eks-observability-accelerators/single-new-eks-istio-opensource-observability.md - OSS ADOT Container Logs: patterns/single-new-eks-observability-accelerators/single-new-eks-container-logs-opensource-observability.md + - Cost Monitoring: patterns/single-new-eks-cost-monitoring-ingress-observability.md - OSS Neuron with Inferentia: patterns/single-new-eks-observability-accelerators/single-new-eks-inferentia-opensource-observability.md - - Logs: logs.md + - Logs: logs.md - Tracing: tracing.md - Supporting Examples: - EKS Cluster: patterns/single-new-eks-observability-accelerators/single-new-eks-cluster.md diff --git a/package.json b/package.json index f1f29c5d..c14b1685 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "typescript": "^5.3.3" }, "dependencies": { + "@kubecost/kubecost-eks-blueprints-addon": "^0.1.8", "@aws-quickstart/eks-blueprints": "1.14.1", "aws-cdk": "2.133.0", "aws-sdk": "^2.1455.0",