Skip to content

Commit

Permalink
feat: implemented pod deletion cronjob
Browse files Browse the repository at this point in the history
  • Loading branch information
tyriis committed Mar 16, 2023
1 parent 576b391 commit 4e5372d
Show file tree
Hide file tree
Showing 13 changed files with 342 additions and 1 deletion.
16 changes: 16 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Editor configuration, see https://editorconfig.org
root = true

[*]
charset = utf-8
indent_style = space
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true

[*.md]
max_line_length = off
trim_trailing_whitespace = false

[Makefile]
indent_style = tab
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Editors
.vscode/
.idea/

# OS artifacts
.DS_Store
Thumbs.db

# vscode-sops
.decrypted~*.yaml

# Env files
*.envrc
*.env
23 changes: 23 additions & 0 deletions .markdownlint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
default: true

# MD013/line-length - Line length
MD013:
# Number of characters
line_length: 240
# Number of characters for headings
heading_line_length: 80
# Number of characters for code blocks
code_block_line_length: 120
# Include code blocks
code_blocks: true
# Include tables
tables: true
# Include headings
headings: true
# Include headings
headers: true
# Strict length checking
strict: false
# Stern length checking
stern: false
104 changes: 104 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
---
fail_fast: false
default_stages:
- commit
- push

repos:
- repo: https://github.com/thlorenz/doctoc
rev: v2.2.0
hooks:
- id: doctoc
args:
- --update-only
- --maxlevel
- "3"
- --github
- --notitle

- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.77.1
hooks:
- id: terraform_docs
args:
- --hook-config=--path-to-file=README.md
- --args=--config=.terraform-docs.yaml
- ./infra/terraform
- id: terraform_fmt
- id: terraform_tflint
args:
- --args=--config=__GIT_WORKING_DIR__/.tflint.hcl
# - id: terraform_tfsec
# args:
# - --args=--config-file=__GIT_WORKING_DIR__/.tfsec.yaml

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-merge-conflict
- id: check-added-large-files
args:
- --maxkb=100
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-json
- id: check-symlinks
- id: check-xml
- id: detect-private-key
- id: end-of-file-fixer
- id: fix-byte-order-marker
- id: mixed-line-ending
args:
- --fix=auto
- id: trailing-whitespace
args:
- --markdown-linebreak-ext=md

- repo: https://github.com/adrienverge/yamllint
rev: v1.29.0
hooks:
- id: yamllint
args:
- --config-file
- .yamllint.yaml

- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.4.2
hooks:
- id: remove-crlf
- id: remove-tabs

- repo: https://github.com/sirosen/texthooks
rev: 0.5.0
hooks:
- id: fix-smartquotes
- id: fix-ligatures
- id: forbid-bidi-controls

- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.33.0
hooks:
- id: markdownlint-fix
args:
- --config
- .markdownlint.yaml

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.0-alpha.4
hooks:
- id: prettier
args:
- --ignore-path
- .prettierignore
- --config
- .prettierrc.yaml

- repo: https://github.com/k8s-at-home/sops-pre-commit
rev: v2.1.1
hooks:
- id: forbid-secrets

- repo: https://github.com/zricethezav/gitleaks
rev: v8.12.0
hooks:
- id: gitleaks
Empty file added .prettierignore
Empty file.
5 changes: 5 additions & 0 deletions .prettierrc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
trailingComma: "es5"
tabWidth: 2
semi: false
singleQuote: false
30 changes: 30 additions & 0 deletions .yamllint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
# see https://yamllint.readthedocs.io/en/stable/index.html for more options
extends: default
rules:
truthy:
allowed-values: ["true", "false", "on", "yes"]

line-length:
max: 120
level: warning

braces:
min-spaces-inside: 0
max-spaces-inside: 1

brackets:
min-spaces-inside: 0
max-spaces-inside: 0

indentation:
spaces: 2
indent-sequences: consistent
check-multi-line-strings: false

document-start:
present: true
level: error

comments:
min-spaces-from-content: 1
33 changes: 32 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,33 @@
# i-see-dead-pods
# I see dead Pods

Get rid of `Pod was terminated in response to imminent node shutdown.` Pods forever.

## Story

In kubernetes with gracefull shutdown enabled, pods can stay for long time perriods in a `broken` state. The state results in alerting getting fired by kube-prometheus-stack.

Most of the `solutions` on the internet describe an uncontrolled deletion of all Pods in `Error` or `Terminated` state.
Wich I consider as a bad idea, because you will not see anymore if real `Error` Pods are in your system.

These manifests provide a kubernetes `CronJob` deleting constantly all Pods with given criterias.

## Setup

### kubectl

You can apply the manifests manually:

```console
kubectl apply -f https://raw.githubusercontent.com/tyriis/i-see-dead-pods/main/manifests/service-account.yaml
kubectl apply -f https://raw.githubusercontent.com/tyriis/i-see-dead-pods/main/manifests/cluster-role.yaml
kubectl apply -f https://raw.githubusercontent.com/tyriis/i-see-dead-pods/main/manifests/cluster-role-binding.yaml
kubectl apply -f https://raw.githubusercontent.com/tyriis/i-see-dead-pods/main/manifests/cronjob.yaml
```

### kustomize

or with kustomize

### flux helmrelease

or with flux
13 changes: 13 additions & 0 deletions manifests/cluster-role-binding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: system:i-see-dead-pods
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:i-see-dead-pods
subjects:
- kind: ServiceAccount
name: i-see-dead-pods
namespace: kube-system
11 changes: 11 additions & 0 deletions manifests/cluster-role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
name: system:i-see-dead-pods
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["list", "get", "delete"]
30 changes: 30 additions & 0 deletions manifests/cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: i-see-dead-pods
namespace: kube-system
spec:
schedule: "* * * * *"
concurrencyPolicy: Forbid
jobTemplate:
spec:
backoffLimit: 0
template:
spec:
serviceAccountName: i-see-dead-pods
containers:
- name: kubectl
image: ghcr.io/k8s-at-home/kubectl:v1.25.4
command:
- /bin/sh
- -ec
- |
kubectl get pods \
--all-namespaces \
-o go-template \
--template='{{range .items}}{{printf "%s %s %s\n" .metadata.namespace .metadata.name .status.message}}{{end}}' \
| grep "Pod was terminated in response to imminent node shutdown." \
| awk '{print $1, $2}' \
| xargs -n2 kubectl delete pod -n || true
restartPolicy: OnFailure
58 changes: 58 additions & 0 deletions manifests/flux-helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: i-see-dead-pods
namespace: kube-system
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 1.3.2
interval: 15m
sourceRef:
kind: HelmRepository
name: bjw-s-charts
namespace: flux-system
maxHistory: 15
install:
createNamespace: true
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
controller:
type: cronjob
cronjob:
schedule: "* * * * *"
ttlSecondsAfterFinished: 60
restartPolicy: OnFailure
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
image:
repository: ghcr.io/k8s-at-home/kubectl
tag: v1.25.4
command:
- /bin/sh
- -ec
- |
kubectl get pods \
--all-namespaces \
-o go-template \
--template='{{range .items}}{{printf "%s %s %s\n" .metadata.namespace .metadata.name .status.message}}{{end}}' \
| grep "Pod was terminated in response to imminent node shutdown." \
| awk '{print $1, $2}' \
| xargs -n2 kubectl delete pod -n || true
resources:
requests:
cpu: 10m
memory: 10Mi
limits:
memory: 10Mi
6 changes: 6 additions & 0 deletions manifests/service-account.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: i-see-dead-pods
namespace: kube-system

0 comments on commit 4e5372d

Please sign in to comment.