diff --git a/README.md b/README.md index 0a1bd4b..2a01e2f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,21 @@ # Trivy Operator -Trivy Operator is an operator that default every 5 minutes execute a scan script. It will get image list from all namespaces with the label `trivy-scan=true`, and then scan this images with trivy, finally we will get metrics on `http://[pod-ip]:9115/metrics` - Built with [kopf](https://github.com/nolar/kopf) +Main functions: + +* Scheduled Image scans on running pods +* Trivy Image Validator Admission controller + +Inspirated by [knqyf263](https://github.com/knqyf263)'s [trivy-enforcer](https://github.com/aquasecurity/trivy-enforcer) and [fleeto](https://github.com/fleeto)'s [trivy-scanner](https://github.com/fleeto/trivy-scanner). + +### Schefuled Image scans +Default every 5 minutes execute a scan script. It will get image list from all namespaces with the label `trivy-scan=true`, and then scan this images with trivy, finally we will get metrics on `http://[pod-ip]:9115/metrics` + +### Trivy Image Validator +The admission controller function can be configured as a ValidatingWebhook in a k8s cluster. Kubernetes will send requests to the admission server when a Pod creation is initiated. The admission controller checks the image using trivy. + + ## Usage ```bash @@ -54,3 +66,51 @@ kubectl logs [2021-10-02 09:45:52,227] kopf.objects [INFO ] [trivytest/main-config] Scanning Image: docker.io/library/nginx:1.18 [2021-10-02 09:45:55,556] kopf.objects [INFO ] [trivytest/main-config] Scanning Image: docker.io/library/nginx:latest ~~~ + +### Example Deploy: +You can define policy to the Admission Controller, by adding annotation to the pod trough the deployment: + +```yaml +spec: + ... + template: + metadata: + annotations: + trivy.security.devopstales.io/medium: "5" + trivy.security.devopstales.io/low: "10" + trivy.security.devopstales.io/critical: "2" +... +``` + +### Development + +Install trivy: + +```bash +nano /etc/yum.repos.d/trivy.repo +[trivy] +name=Trivy repository +baseurl=https://aquasecurity.github.io/trivy-repo/rpm/releases/$releasever/$basearch/ +gpgcheck=0 +enabled=1 + +sudo yum -y install trivy +``` + +To run kopf development you need to install the fallowing packages to the k3s host: + +```bash +yum install -y python3.8 +pip3 install --no-cache-dir kopf kubernetes asyncio pycron prometheus_client certvalidator certbuilder +pip3 install --no-cache-dir kopf[devel] +``` + +The admission webhook try to call the host with the domain name `host.k3d.internal` so I added to the host's `/etc/host` file. + +```bash +echo "172.17.12.10 host.k3d.internal" >> /etc/host +``` + +```bash +kopf run -A ./trivy-operator.py +``` diff --git a/deploy/10_demo.yaml b/deploy/10_demo.yaml index 4532543..3623c22 100644 --- a/deploy/10_demo.yaml +++ b/deploy/10_demo.yaml @@ -12,6 +12,13 @@ metadata: name: nginx namespace: trivytest spec: + initContainers: + - name: init + image: nginxinc/nginx-unprivileged:latest + command: ['sh', '-c', 'echo The app is running! && sleep 10'] + - name: init2 + image: nginxinc/nginx-unprivileged:latest + command: ['sh', '-c', 'echo The app is running! && sleep 10'] containers: - image: nginx:1.18 imagePullPolicy: IfNotPresent @@ -26,4 +33,7 @@ spec: containers: - image: nginx imagePullPolicy: IfNotPresent - name: nginx \ No newline at end of file + name: nginx + - image: nginx + imagePullPolicy: IfNotPresent + name: nginx2 diff --git a/docker/Dockerfile b/docker/Dockerfile index 7446e81..abe214c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,18 +1,16 @@ -FROM python:3.8-alpine +FROM python:3.8.12-slim-buster ENV TRIVY_CACHE_DIR=/home/trivy-operator/trivy-cache \ TRIVY_QUIET=true \ IN_CLUSTER=true -RUN apk add --no-cache gcc musl-dev - -RUN pip3 install --no-cache-dir kopf kubernetes asyncio pycron prometheus_client +RUN pip3 install --no-cache-dir kopf[dev] kubernetes asyncio pycron prometheus_client oscrypto certvalidator certbuilder validators COPY trivy-operator.py /trivy-operator.py COPY trivy /usr/local/bin -RUN addgroup -S -g 10001 trivy-operator && \ - adduser -S -u 10001 trivy-operator -G trivy-operator && \ +RUN addgroup --gid 10001 trivy-operator && \ + adduser --uid 10001 trivy-operator --ingroup trivy-operator && \ mkdir /home/trivy-operator/trivy-cache && \ chown -R trivy-operator:trivy-operator /home/trivy-operator/trivy-cache diff --git a/trivy-operator.py b/trivy-operator.py index a65d81f..9c2df13 100644 --- a/trivy-operator.py +++ b/trivy-operator.py @@ -8,6 +8,8 @@ import sys import subprocess import json +import validators +from typing import AsyncIterator """ apiVersion: trivy-operator.devopstales.io/v1 @@ -24,6 +26,26 @@ password: "password" """ +############################################################################# +# ToDo +############################################################################# +# OP +# AC +## namespace selector for admission controller webhook +# cache scanned images ??? + + +############################################################################# +# Global Variables +############################################################################# +CONTAINER_VULN = prometheus_client.Gauge('so_vulnerabilities', 'Container vulnerabilities', ['exported_namespace', 'image', 'severity']) +AC_VULN = prometheus_client.Gauge('ac_vulnerabilities', 'Admission Controller vulnerabilities', ['exported_namespace', 'image', 'severity']) +IN_CLUSTER = os.getenv("IN_CLUSTER", False) + +############################################################################# +# Pretasks +############################################################################# + """Deploy CRDs""" @kopf.on.startup() async def startup_fn_crd(logger, **kwargs): @@ -90,7 +112,6 @@ async def startup_fn_crd(logger, **kwargs): ) ) - IN_CLUSTER = os.getenv("IN_CLUSTER", False) if IN_CLUSTER: k8s_config.load_incluster_config() else: @@ -116,14 +137,20 @@ async def startup_fn_trivy_cache(logger, **kwargs): ) logger.info("trivy cache created...") +#"""Start Prometheus Exporter""" +@kopf.on.startup() +async def startup_fn_prometheus_client(logger, **kwargs): + prometheus_client.start_http_server(9115) + logger.info("Prometheus Exporter started...") + +############################################################################# +# Operator +############################################################################# + """Scanner Creation""" @kopf.on.create('trivy-operator.devopstales.io', 'v1', 'namespace-scanners') async def create_fn(logger, spec, **kwargs): - CONTAINER_VULN = prometheus_client.Gauge('so_vulnerabilities', 'Container vulnerabilities', ['exported_namespace', 'image', 'severity']) - - """Start Prometheus Exporter""" - prometheus_client.start_http_server(9115) - logger.info("Prometheus Exporter started...") + logger.info("NamespaceScanner Created") try: crontab = spec['crontab'] @@ -140,7 +167,6 @@ async def create_fn(logger, spec, **kwargs): while True: if pycron.is_now(crontab): """Find Namespaces""" - IN_CLUSTER = os.getenv("IN_CLUSTER", False) image_list = {} vul_list = {} tagged_ns_list = [] @@ -167,17 +193,34 @@ async def create_fn(logger, spec, **kwargs): pod_list = k8s_client.CoreV1Api().list_namespaced_pod(tagged_ns) """Find images in pods""" for pod in pod_list.items: - pod_name = pod.metadata.name - images = pod.status.container_statuses - for image in images: + Containers = pod.status.container_statuses + for image in Containers: + pod_name = pod.metadata.name + pod_name += '_' + pod_name += image.name + image_list[pod_name] = list() image_name = image.image image_id = image.image_id - image_list[pod_name] = list() image_list[pod_name].append(image_name) image_list[pod_name].append(image_id) image_list[pod_name].append(tagged_ns) + try: + initContainers = pod.status.init_container_statuses + for image in initContainers: + pod_name = pod.metadata.name + pod_name += '_' + pod_name += image.name + image_list[pod_name] = list() + image_name = image.image + image_id = image.image_id + image_list[pod_name].append(image_name) + image_list[pod_name].append(image_id) + image_list[pod_name].append(tagged_ns) + except: + continue """Scan images""" + logger.info("%s" % (image_list)) # debug for image in image_list: logger.info("Scanning Image: %s" % (image_list[image][0])) image_name = image_list[image][0] @@ -190,49 +233,228 @@ async def create_fn(logger, spec, **kwargs): for reg in registry_list: if reg['name'] == registry: + os.environ['DOCKER_REGISTRY']=reg['name'] os.environ['TRIVY_USERNAME']=reg['user'] os.environ['TRIVY_PASSWORD']=reg['password'] + elif not validators.domain(registry): + """If registry is not an url""" + if reg['name'] == "docker.io": + os.environ['DOCKER_REGISTRY']=reg['name'] + os.environ['TRIVY_USERNAME']=reg['user'] + os.environ['TRIVY_PASSWORD']=reg['password'] except: - logger.info("no registry auth config is defined") + logger.info("No registry auth config is defined.") + ACTIVE_REGISTRY = os.getenv("DOCKER_REGISTRY") + logger.info("Active Registry: %s" % (ACTIVE_REGISTRY)) # Debug TRIVY = ["trivy", "-q", "i", "-f", "json", image_name] # --ignore-policy trivy.rego res = subprocess.Popen(TRIVY,stdout=subprocess.PIPE,stderr=subprocess.PIPE); output,error = res.communicate() - if output: - trivy_result = json.loads(output.decode("UTF-8")) - item_list = trivy_result['Results'][0]["Vulnerabilities"] - vuls = { - "UNKNOWN": 0,"LOW": 0, - "MEDIUM": 0,"HIGH": 0, - "CRITICAL": 0 - } - for item in item_list: - vuls[item["Severity"]] += 1 - vul_list[image_name] = [vuls, ns_name] - - """Generate Metricfile""" - for image_name in vul_list.keys(): - for severity in vul_list[image_name][0].keys(): - CONTAINER_VULN.labels(vul_list[image_name][1], image_name, severity).set(int(vul_list[image_name][0][severity])) - + if error: + """Error Logging""" logger.error("TRIVY ERROR: return %s" % (res.returncode)) if b"401" in error.strip(): logger.error("Repository: Unauthorized authentication required") - if b"UNAUTHORIZED" in error.strip(): + elif b"UNAUTHORIZED" in error.strip(): logger.error("Repository: Unauthorized authentication required") - if b"You have reached your pull rate limit." in error.strip(): + elif b"You have reached your pull rate limit." in error.strip(): logger.error("You have reached your pull rate limit.") + elif b"unsupported MediaType" in error.strip(): + logger.error("Unsupported MediaType: see https://github.com/google/go-containerregistry/issues/377") + else: + logger.error("%s" % (error.strip())) + """Error action""" + vuls = { "scanning_error": 1 } + vul_list[image_name] = [vuls, ns_name] + elif output: + trivy_result = json.loads(output.decode("UTF-8")) + item_list = trivy_result['Results'][0]["Vulnerabilities"] + vuls = { "UNKNOWN": 0,"LOW": 0,"MEDIUM": 0,"HIGH": 0,"CRITICAL": 0 } + for item in item_list: + vuls[item["Severity"]] += 1 + vul_list[image_name] = [vuls, ns_name] + + """Generate Metricfile""" + for image_name in vul_list.keys(): + for severity in vul_list[image_name][0].keys(): + CONTAINER_VULN.labels(vul_list[image_name][1], image_name, severity).set(int(vul_list[image_name][0][severity])) await asyncio.sleep(15) else: await asyncio.sleep(15) +############################################################################# +# Admission Controller +############################################################################# +# https://github.com/nolar/kopf/issues/785#issuecomment-859931945 +if IN_CLUSTER: + class ServiceTunnel: + async def __call__( + self, fn: kopf.WebhookFn + ) -> AsyncIterator[kopf.WebhookClientConfig]: + # https://github.com/kubernetes-client/python/issues/363 + # Use field reference to environment variable instad + namespace = os.environ.get("POD_NAMESPACE", "trivy-operator") + name = "trivy-image-validator" + service_port = int(443) + container_port = int(8443) + server = kopf.WebhookServer(port=container_port, host=f"{name}.{namespace}.svc") + async for client_config in server(fn): + client_config["url"] = None + client_config["service"] = kopf.WebhookClientConfigService( + name=name, namespace=namespace, port=service_port + ) + yield client_config + +@kopf.on.startup() +def configure(settings: kopf.OperatorSettings, **_): + # Auto-detect the best server (K3d/Minikube/simple): + if IN_CLUSTER: +# settings.admission.server = kopf.WebhookServer(addr='0.0.0.0', port=8443, host="trivy-image-validator.trivy-operator.svc") + settings.admission.server = ServiceTunnel() + else: + settings.admission.server = kopf.WebhookAutoServer(port=443) + settings.admission.managed = 'trivy-image-validator.devopstales.io' + +@kopf.on.validate('pod', operation='CREATE') +def validate1(logger, namespace, name, annotations, spec, **_): + logger.info("Admission Controller is working") + image_list = [] + vul_list = {} + registry_list = {} + + """Try to get Registry auth values""" + if IN_CLUSTER: + k8s_config.load_incluster_config() + else: + k8s_config.load_kube_config() + try: + # if no namespace-scanners created + nsScans = k8s_client.CustomObjectsApi().list_cluster_custom_object( + group="trivy-operator.devopstales.io", + version="v1", + plural="namespace-scanners", + ) + for nss in nsScans["items"]: + registry_list = nss["spec"]["registry"] + except: + logger.info("No ns-scan object created yet.") + + """Get conainers""" + containers = spec.get('containers') + initContainers = spec.get('initContainers') + + try: + for icn in initContainers: + initContainers_array = json.dumps(icn) + initContainer = json.loads(initContainers_array) + image_name = initContainer["image"] + image_list.append(image_name) + except: + print("") + + for cn in containers: + container_array = json.dumps(cn) + container = json.loads(container_array) + image_name = container["image"] + image_list.append(image_name) + + """Get Images""" + for image_name in image_list: + registry = image_name.split('/')[0] + logger.info("Scanning Image: %s" % (image_name)) + + """Login to registry""" + try: + for reg in registry_list: + if reg['name'] == registry: + os.environ['DOCKER_REGISTRY']=reg['name'] + os.environ['TRIVY_USERNAME']=reg['user'] + os.environ['TRIVY_PASSWORD']=reg['password'] + elif not validators.domain(registry): + """If registry is not an url""" + if reg['name'] == "docker.io": + os.environ['DOCKER_REGISTRY']=reg['name'] + os.environ['TRIVY_USERNAME']=reg['user'] + os.environ['TRIVY_PASSWORD']=reg['password'] + except: + logger.info("No registry auth config is defined.") + ACTIVE_REGISTRY = os.getenv("DOCKER_REGISTRY") +# logger.info("Active Registry: %s" % (ACTIVE_REGISTRY)) # Debug + + """Scan Images""" + TRIVY = ["trivy", "-q", "i", "-f", "json", image_name] + # --ignore-policy trivy.rego + + res = subprocess.Popen(TRIVY,stdout=subprocess.PIPE,stderr=subprocess.PIPE); + output,error = res.communicate() + if error: + """Error Logging""" + logger.error("TRIVY ERROR: return %s" % (res.returncode)) + if b"401" in error.strip(): + logger.error("Repository: Unauthorized authentication required") + elif b"UNAUTHORIZED" in error.strip(): + logger.error("Repository: Unauthorized authentication required") + elif b"You have reached your pull rate limit." in error.strip(): + logger.error("You have reached your pull rate limit.") + elif b"unsupported MediaType" in error.strip(): + logger.error("Unsupported MediaType: see https://github.com/google/go-containerregistry/issues/377") + else: + logger.error("%s" % (error.strip())) + """Error action""" + se = { "scanning_error": 1 } + vul_list[image_name] = [se, namespace] + + elif output: + trivy_result = json.loads(output.decode("UTF-8")) + item_list = trivy_result['Results'][0]["Vulnerabilities"] + vuls = { "UNKNOWN": 0,"LOW": 0,"MEDIUM": 0,"HIGH": 0,"CRITICAL": 0 } + for item in item_list: + vuls[item["Severity"]] += 1 + vul_list[image_name] = [vuls, namespace] + + """Generate log""" + logger.info("severity: %s" % (vul_list[image_name][0])) # Logging + + """Generate Metricfile""" + for image_name in vul_list.keys(): + for severity in vul_list[image_name][0].keys(): + AC_VULN.labels(vul_list[image_name][1], image_name, severity).set(int(vul_list[image_name][0][severity])) + # logger.info("Prometheus Done") # Debug + + # Get vulnerabilities from annotations + vul_annotations= { "UNKNOWN": 0,"LOW": 0,"MEDIUM": 0,"HIGH": 0,"CRITICAL": 0 } + for sev in vul_annotations: + try: +# logger.info("%s: %s" % (sev, annotations['trivy.security.devopstales.io/' + sev.lower()])) # Debug + vul_annotations[sev] = annotations['trivy.security.devopstales.io/' + sev.lower()] + except: + continue + + # Check vulnerabilities + # logger.info("Check vulnerabilities:") # Debug + if "scanning_error" in vul_list[image_name][0]: + logger.error("Trivy can't scann the image") + raise kopf.AdmissionError(f"Trivy can't scann the image: %s" % (image_name)) + else: + for sev in vul_annotations: + an_vul_num = vul_annotations[sev] + vul_num = vul_list[image_name][0][sev] + if int(vul_num) > int(an_vul_num): +# logger.error("%s is bigger" % (sev)) # Debug + raise kopf.AdmissionError(f"Too much vulnerability in the image: %s" % (image_name)) + else: +# logger.info("%s is ok" % (sev)) # Debug + continue + +############################################################################# ## print to operator log # print(f"And here we are! Creating: %s" % (ns_name), file=sys.stderr) # debug ## message to CR # return {'message': 'hello world'} # will be the new status ## events to CR describe -# kopf.event(body, type="SomeType", reason="SomeReason", message="Some message") \ No newline at end of file +# kopf.event(body, type="SomeType", reason="SomeReason", message="Some message") +