From 4fc8c157920bf4867cec5764f29d209a3926d31e Mon Sep 17 00:00:00 2001 From: abuabraham-ttd Date: Mon, 18 Nov 2024 11:44:13 -0800 Subject: [PATCH] Adding pre-init validation, switching to python and interfaces --- scripts/aws/Dockerfile | 2 +- scripts/aws/config-server/requirements.txt | 2 + scripts/aws/ec2.py | 179 ++++++++++++++++++ scripts/aws/start.sh | 124 ------------ scripts/aws/stop.sh | 31 --- .../uid2-operator-ami/ansible/playbook.yml | 17 +- scripts/aws/uid2operator.service | 4 +- scripts/confidential_compute.py | 73 +++++++ 8 files changed, 260 insertions(+), 172 deletions(-) create mode 100755 scripts/aws/ec2.py delete mode 100644 scripts/aws/start.sh delete mode 100644 scripts/aws/stop.sh create mode 100644 scripts/confidential_compute.py diff --git a/scripts/aws/Dockerfile b/scripts/aws/Dockerfile index 6007437b1..e210001c3 100644 --- a/scripts/aws/Dockerfile +++ b/scripts/aws/Dockerfile @@ -45,4 +45,4 @@ COPY ./syslog-ng-client.conf /etc/syslog-ng/syslog-ng.conf RUN chmod +x /app/vsockpx && chmod +x /app/entrypoint.sh -CMD ["/app/ec2.py"] +CMD ["/app/entrypoint.sh"] diff --git a/scripts/aws/config-server/requirements.txt b/scripts/aws/config-server/requirements.txt index 57652a258..bd64bfe35 100644 --- a/scripts/aws/config-server/requirements.txt +++ b/scripts/aws/config-server/requirements.txt @@ -1,3 +1,5 @@ Flask==2.3.2 Werkzeug==3.0.3 setuptools==70.0.0 +requests==2.32.3 +boto3==1.35.59 \ No newline at end of file diff --git a/scripts/aws/ec2.py b/scripts/aws/ec2.py new file mode 100755 index 000000000..27a6cb15f --- /dev/null +++ b/scripts/aws/ec2.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +import boto3 +import json +import os +import subprocess +import re +import multiprocessing +import requests +import signal +import argparse +from botocore.exceptions import ClientError +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from confidential_compute import ConfidentialCompute + +class EC2(ConfidentialCompute): + + def __init__(self): + super().__init__() + self.config = {} + + def _get_secret(self, secret_identifier): + client = boto3.client("secretsmanager", region_name=self.__get_current_region()) + try: + secret = client.get_secret_value(SecretId=secret_identifier) + return json.loads(secret["SecretString"]) + except ClientError as e: + raise Exception("Unable to access secret store") + + def __add_defaults(self, configs): + configs.setdefault("enclave_memory_mb", 24576) + configs.setdefault("enclave_cpu_count", 6) + configs.setdefault("debug_mode", False) + return configs + + def __setup_vsockproxy(self, log_level): + thread_count = int((multiprocessing.cpu_count() + 1) // 2) + log_level = log_level + try: + subprocess.Popen(["/usr/bin/vsockpx", "-c", "/etc/uid2operator/proxy.yaml", "--workers", str(thread_count), "--log-level", log_level, "--daemon"]) + print("VSOCK proxy is now running in the background") + except FileNotFoundError: + print("Error: vsockpx not found. Please ensure the path is correct") + except Exception as e: + print("Failed to start VSOCK proxy") + + def __run_config_server(self, log_level): + os.makedirs("/etc/secret/secret-value", exist_ok=True) + with open('/etc/secret/secret-value/config', 'w') as fp: + json.dump(self.configs, fp) + os.chdir("/opt/uid2operator/config-server") + # TODO: Add --log-level to flask. + try: + subprocess.Popen(["./bin/flask", "run", "--host", "127.0.0.1", "--port", "27015"]) + print("Config server is now running in the background.") + except Exception as e: + print(f"Failed to start config server: {e}") + + def __run_socks_proxy(self, log_level): + subprocess.Popen(["sockd", "-d"]) + + def _validate_auxilaries(self): + proxy = "socks5h://127.0.0.1:3305" + url = "http://127.0.0.1:27015/getConfig" + response = requests.get(url) + if response.status_code != 200: + raise Exception("Config server unreachable") + proxies = { + "http": proxy, + "https": proxy, + } + try: + response = requests.get(url, proxies=proxies) + response.raise_for_status() + except Exception as e: + raise Exception(f"Cannot conect to config server through socks5: {e}") + pass + + def __get_aws_token(self): + try: + token_url = "http://169.254.169.254/latest/api/token" + token_response = requests.put(token_url, headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2) + return token_response.text + except Exception as e: + return "blank" + + def __get_current_region(self): + token = self.__get_aws_token() + metadata_url = "http://169.254.169.254/latest/dynamic/instance-identity/document" + headers = {"X-aws-ec2-metadata-token": token} + try: + response = requests.get(metadata_url, headers=headers,timeout=2) + if response.status_code == 200: + return response.json().get("region") + else: + print(f"Failed to fetch region, status code: {response.status_code}") + except Exception as e: + raise Exception(f"Region not found, are you running in EC2 environment. {e}") + + def __get_secret_name_from_userdata(self): + token = self.__get_aws_token() + user_data_url = "http://169.254.169.254/latest/user-data" + user_data_response = requests.get(user_data_url, headers={"X-aws-ec2-metadata-token": token}) + user_data = user_data_response.text + identity_scope = open("/opt/uid2operator/identity_scope.txt").read().strip() + default_name = "{}-operator-config-key".format(identity_scope.lower()) + hardcoded_value = "{}_CONFIG_SECRET_KEY".format(identity_scope.upper()) + match = re.search(rf'^export {hardcoded_value}="(.+?)"$', user_data, re.MULTILINE) + return match.group(1) if match else default_name + + def _setup_auxilaries(self): + hostname = os.getenv("HOSTNAME", default=os.uname()[1]) + file_path = "HOSTNAME" + try: + with open(file_path, "w") as file: + file.write(hostname) + print(f"Hostname '{hostname}' written to {file_path}") + except Exception as e: + print(f"An error occurred : {e}") + config = self._get_secret(self.__get_secret_name_from_userdata()) + self.configs = self.__add_defaults(config) + log_level = 3 if self.configs['debug_mode'] else 1 + self.__setup_vsockproxy(log_level) + self.__run_config_server(log_level) + self.__run_socks_proxy(log_level) + + def run_compute(self): + self._setup_auxilaries() + self._validate_auxilaries() + command = [ + "nitro-cli", "run-enclave", + "--eif-path", "/opt/uid2operator/uid2operator.eif", + "--memory", self.config['enclave_memory_mb'], + "--cpu-count", self.config['enclave_cpu_count'], + "--enclave-cid", 42, + "--enclave-name", "uid2operator" + ] + if self.config['debug']: + command+=["--debug-mode", "--attach-console"] + subprocess.run(command, check=True) + + def cleanup(self): + describe_output = subprocess.check_output(["nitro-cli", "describe-enclaves"], text=True) + enclaves = json.loads(describe_output) + enclave_id = enclaves[0].get("EnclaveID") if enclaves else None + if enclave_id: + subprocess.run(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id]) + print(f"Enclave with ID {enclave_id} has been terminated.") + else: + print("No enclave found or EnclaveID is null.") + + def kill_process(self, process_name): + try: + result = subprocess.run( + ["pgrep", "-f", process_name], + stdout=subprocess.PIPE, + text=True, + check=False + ) + if result.stdout.strip(): + for pid in result.stdout.strip().split("\n"): + os.kill(int(pid), signal.SIGKILL) + print(f"{process_name} exited") + else: + print(f"Process {process_name} not found") + except Exception as e: + print(f"Failed to shut down {process_name}: {e}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--operation", required=False) + args = parser.parse_args() + ec2 = EC2() + if args.operation and args.operation == "stop": + ec2.cleanup() + [ec2.kill_process(process) for process in ["vsockpx", "sockd", "vsock-proxy", "nohup"]] + else: + ec2.run_compute() diff --git a/scripts/aws/start.sh b/scripts/aws/start.sh deleted file mode 100644 index 429826928..000000000 --- a/scripts/aws/start.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/bin/bash - -echo "$HOSTNAME" > /etc/uid2operator/HOSTNAME -EIF_PATH=${EIF_PATH:-/opt/uid2operator/uid2operator.eif} -IDENTITY_SCOPE=${IDENTITY_SCOPE:-$(cat /opt/uid2operator/identity_scope.txt)} -CID=${CID:-42} -TOKEN=$(curl --request PUT "http://169.254.169.254/latest/api/token" --header "X-aws-ec2-metadata-token-ttl-seconds: 3600") -USER_DATA=$(curl -s http://169.254.169.254/latest/user-data --header "X-aws-ec2-metadata-token: $TOKEN") -AWS_REGION_NAME=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document/ --header "X-aws-ec2-metadata-token: $TOKEN" | jq -r '.region') -if [ "$IDENTITY_SCOPE" = 'UID2' ]; then - UID2_CONFIG_SECRET_KEY=$([[ "$(echo "${USER_DATA}" | grep UID2_CONFIG_SECRET_KEY=)" =~ ^export\ UID2_CONFIG_SECRET_KEY=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "uid2-operator-config-key") -elif [ "$IDENTITY_SCOPE" = 'EUID' ]; then - UID2_CONFIG_SECRET_KEY=$([[ "$(echo "${USER_DATA}" | grep EUID_CONFIG_SECRET_KEY=)" =~ ^export\ EUID_CONFIG_SECRET_KEY=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "euid-operator-config-key") -else - echo "Unrecognized IDENTITY_SCOPE $IDENTITY_SCOPE" - exit 1 -fi -CORE_BASE_URL=$([[ "$(echo "${USER_DATA}" | grep CORE_BASE_URL=)" =~ ^export\ CORE_BASE_URL=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "") -OPTOUT_BASE_URL=$([[ "$(echo "${USER_DATA}" | grep OPTOUT_BASE_URL=)" =~ ^export\ OPTOUT_BASE_URL=\"(.*)\"$ ]] && echo "${BASH_REMATCH[1]}" || echo "") - -echo "UID2_CONFIG_SECRET_KEY=${UID2_CONFIG_SECRET_KEY}" -echo "CORE_BASE_URL=${CORE_BASE_URL}" -echo "OPTOUT_BASE_URL=${OPTOUT_BASE_URL}" -echo "AWS_REGION_NAME=${AWS_REGION_NAME}" - -function terminate_old_enclave() { - ENCLAVE_ID=$(nitro-cli describe-enclaves | jq -r ".[0].EnclaveID") - [ "$ENCLAVE_ID" != "null" ] && nitro-cli terminate-enclave --enclave-id ${ENCLAVE_ID} -} - -function config_aws() { - aws configure set default.region $AWS_REGION_NAME -} - -function default_cpu() { - target=$(( $(nproc) * 3 / 4 )) - if [ $target -lt 2 ]; then - target="2" - fi - echo $target -} - -function default_mem() { - target=$(( $(grep MemTotal /proc/meminfo | awk '{print $2}') * 3 / 4000 )) - if [ $target -lt 24576 ]; then - target="24576" - fi - echo $target -} - -function read_allocation() { - USER_CUSTOMIZED=$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString' | jq -r '.customize_enclave') - shopt -s nocasematch - if [ "$USER_CUSTOMIZED" = "true" ]; then - echo "Applying user customized CPU/Mem allocation..." - CPU_COUNT=${CPU_COUNT:-$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString' | jq -r '.enclave_cpu_count')} - MEMORY_MB=${MEMORY_MB:-$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString' | jq -r '.enclave_memory_mb')} - else - echo "Applying default CPU/Mem allocation..." - CPU_COUNT=6 - MEMORY_MB=24576 - fi - shopt -u nocasematch -} - - -function update_allocation() { - ALLOCATOR_YAML=/etc/nitro_enclaves/allocator.yaml - if [ -z "$CPU_COUNT" ] || [ -z "$MEMORY_MB" ]; then - echo 'No CPU_COUNT or MEMORY_MB set, cannot start enclave' - exit 1 - fi - echo "updating allocator: CPU_COUNT=$CPU_COUNT, MEMORY_MB=$MEMORY_MB..." - systemctl stop nitro-enclaves-allocator.service - sed -r "s/^(\s*memory_mib\s*:\s*).*/\1$MEMORY_MB/" -i $ALLOCATOR_YAML - sed -r "s/^(\s*cpu_count\s*:\s*).*/\1$CPU_COUNT/" -i $ALLOCATOR_YAML - systemctl start nitro-enclaves-allocator.service && systemctl enable nitro-enclaves-allocator.service - echo "nitro-enclaves-allocator restarted" -} - -function setup_vsockproxy() { - VSOCK_PROXY=${VSOCK_PROXY:-/usr/bin/vsockpx} - VSOCK_CONFIG=${VSOCK_CONFIG:-/etc/uid2operator/proxy.yaml} - VSOCK_THREADS=${VSOCK_THREADS:-$(( ( $(nproc) + 1 ) / 2 )) } - VSOCK_LOG_LEVEL=${VSOCK_LOG_LEVEL:-3} - echo "starting vsock proxy at $VSOCK_PROXY with $VSOCK_THREADS worker threads..." - $VSOCK_PROXY -c $VSOCK_CONFIG --workers $VSOCK_THREADS --log-level $VSOCK_LOG_LEVEL --daemon - echo "vsock proxy now running in background." -} - -function setup_dante() { - sockd -D -} - -function run_config_server() { - mkdir -p /etc/secret/secret-value - { - set +x; # Disable tracing within this block - 2>/dev/null; - SECRET_JSON=$(aws secretsmanager get-secret-value --secret-id "$UID2_CONFIG_SECRET_KEY" | jq -r '.SecretString') - echo "${SECRET_JSON}" > /etc/secret/secret-value/config; - } - echo $(jq ".core_base_url = \"$CORE_BASE_URL\"" /etc/secret/secret-value/config) > /etc/secret/secret-value/config - echo $(jq ".optout_base_url = \"$OPTOUT_BASE_URL\"" /etc/secret/secret-value/config) > /etc/secret/secret-value/config - echo "run_config_server" - cd /opt/uid2operator/config-server - ./bin/flask run --host 127.0.0.1 --port 27015 & -} - -function run_enclave() { - echo "starting enclave..." - nitro-cli run-enclave --eif-path $EIF_PATH --memory $MEMORY_MB --cpu-count $CPU_COUNT --enclave-cid $CID --enclave-name uid2operator -} - -terminate_old_enclave -config_aws -read_allocation -# update_allocation -setup_vsockproxy -setup_dante -run_config_server -run_enclave - -echo "Done!" diff --git a/scripts/aws/stop.sh b/scripts/aws/stop.sh deleted file mode 100644 index c37bdc729..000000000 --- a/scripts/aws/stop.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -function terminate_old_enclave() { - echo "Terminating Enclave..." - ENCLAVE_ID=$(nitro-cli describe-enclaves | jq -r ".[0].EnclaveID") - if [ "$ENCLAVE_ID" != "null" ]; then - nitro-cli terminate-enclave --enclave-id $ENCLAVE_ID - else - echo "no running enclaves to terminate" - fi -} - -function kill_process() { - echo "Shutting down $1..." - pid=$(pidof $1) - if [ -z "$pid" ]; then - echo "process $1 not found" - else - kill -9 $pid - echo "$1 exited" - fi -} - -terminate_old_enclave -kill_process vsockpx -kill_process sockd -# we start aws vsock-proxy via nohup -kill_process vsock-proxy -kill_process nohup - -echo "Done!" diff --git a/scripts/aws/uid2-operator-ami/ansible/playbook.yml b/scripts/aws/uid2-operator-ami/ansible/playbook.yml index 84c6c6f14..3629b34e3 100644 --- a/scripts/aws/uid2-operator-ami/ansible/playbook.yml +++ b/scripts/aws/uid2-operator-ami/ansible/playbook.yml @@ -72,24 +72,13 @@ - name: Install starter script ansible.builtin.copy: - src: /tmp/artifacts/start.sh - dest: /opt/uid2operator/start.sh + src: /tmp/artifacts/ec2.py + dest: /opt/uid2operator/ec2.py remote_src: yes - name: Make starter script executable ansible.builtin.file: - path: /opt/uid2operator/start.sh - mode: '0755' - - - name: Install stopper script - ansible.builtin.copy: - src: /tmp/artifacts/stop.sh - dest: /opt/uid2operator/stop.sh - remote_src: yes - - - name: Make starter script executable - ansible.builtin.file: - path: /opt/uid2operator/stop.sh + path: /opt/uid2operator/ec2.py mode: '0755' - name: Install Operator EIF diff --git a/scripts/aws/uid2operator.service b/scripts/aws/uid2operator.service index 1d36b7a91..e92f5d401 100644 --- a/scripts/aws/uid2operator.service +++ b/scripts/aws/uid2operator.service @@ -8,8 +8,8 @@ RemainAfterExit=true StandardOutput=journal StandardError=journal SyslogIdentifier=uid2operator -ExecStart=/opt/uid2operator/start.sh -ExecStop=/opt/uid2operator/stop.sh +ExecStart=/opt/uid2operator/ec2.py +ExecStop=/opt/uid2operator/ec2.py -o stop [Install] WantedBy=multi-user.target \ No newline at end of file diff --git a/scripts/confidential_compute.py b/scripts/confidential_compute.py new file mode 100644 index 000000000..a60220bcd --- /dev/null +++ b/scripts/confidential_compute.py @@ -0,0 +1,73 @@ +import requests +import re +import socket +from urllib.parse import urlparse +from abc import ABC, abstractmethod + +class ConfidentialCompute(ABC): + + @abstractmethod + def _get_secret(self, secret_identifier): + """ + Gets the secret from secret store + + Raises: + SecretNotFoundException: Points to public documentation + """ + pass + + def validate_operator_key(self, secrets): + """ + Validates operator key if following new pattern. Ignores otherwise + """ + api_token = secrets.get('api_token', None) + pattern = r"^(UID2|EUID)-.\-(I|P)-\d+-\*$" + if bool(re.match(pattern, api_token)): + if secrets.get('debug_mode', False) or secrets.get('environment') == 'integ': + if api_token.split('-')[2] != 'I': + raise Exception("Operator key does not match the environment") + else: + if api_token.split('-')[2] != 'P': + raise Exception("Operator key does not match the environment") + return True + + def validate_connectivity(self, config): + """ + Validates core/optout is accessible. + """ + try: + core_ip = socket.gethostbyname(urlparse(config['core_base_url']).netloc) + requests.get(config['core_base_url'], timeout=5) + optout_ip = socket.gethostbyname(urlparse(config['optout_base_url']).netloc) + requests.get(config['optout_base_url'], timeout=5) + except (requests.ConnectionError, requests.Timeout) as e : + raise Exception("Failed to reach the URL. -- ERROR CODE, enable IPs? {} {}".format(core_ip, optout_ip), e) + except Exception as e: + raise Exception("Failed to reach the URL. ") + """ + s3 does not have static IP, and the range returned for s3 is huge to validate. + r = requests.get('https://ip-ranges.amazonaws.com/ip-ranges.json') + ips = list(map(lambda x: x['ip_prefix'], filter(lambda x: x['region']=='us-east-1' and x['service'] == 'S3', r.json()['prefixes']))) + """ + return + + @abstractmethod + def _setup_auxilaries(self, secrets): + """ + Sets up auxilary processes required to confidential compute + """ + pass + + @abstractmethod + def _validate_auxilaries(self, secrets): + """ + Validates auxilary services are running + """ + pass + + @abstractmethod + def run_compute(self): + """ + Runs compute. + """ + pass \ No newline at end of file