From b4b2ec7167bfe844ccb776776746fdcc5c6dc666 Mon Sep 17 00:00:00 2001 From: Andrew Durbin Date: Mon, 30 Oct 2023 13:34:28 -0600 Subject: [PATCH] Kubevirt build starts k3s containerd in kube Existing containerd-user instance in pillar won't be started. Use the k3s supplied containerd for compatibility. Start containerd outside k3s to allow us to change the containerd root. Add missing findutils package for longhorn. Raise io priority for embedded etcd. Add Pramodhs containerd.go change to skip starting containerd-user in pillar. Signed-off-by: Andrew Durbin --- pkg/kube/Dockerfile | 4 +- pkg/kube/cluster-init.sh | 62 +++++++++++++++++++++++++++-- pkg/kube/config-k3s.toml | 35 ++++++++++++++++ pkg/kube/config.yaml | 4 ++ pkg/pillar/containerd/containerd.go | 25 ++++++++++-- 5 files changed, 122 insertions(+), 8 deletions(-) create mode 100644 pkg/kube/config-k3s.toml diff --git a/pkg/kube/Dockerfile b/pkg/kube/Dockerfile index 66f1017ff2..f3cff007f2 100644 --- a/pkg/kube/Dockerfile +++ b/pkg/kube/Dockerfile @@ -2,7 +2,7 @@ FROM lfedge/eve-alpine:12487b9900ba40f3ecdadfec2c84799fa34e5014 as build ENV PKGS alpine-baselayout musl-utils iproute2 iptables curl openrc \ - open-iscsi libvirt libvirt-client util-linux grep + open-iscsi libvirt libvirt-client util-linux grep findutils jq RUN eve-alpine-deploy.sh FROM scratch @@ -11,6 +11,8 @@ COPY cluster-init.sh /usr/bin/ COPY cgconfig.conf /etc # kubevirt yaml files are patched files and will be removed later, look at cluster-init.sh COPY kubevirt-operator.yaml /etc +RUN mkdir -p /etc/containerd +COPY config-k3s.toml /etc/containerd/ RUN mkdir -p /etc/rancher/k3s COPY config.yaml /etc/rancher/k3s WORKDIR / diff --git a/pkg/kube/cluster-init.sh b/pkg/kube/cluster-init.sh index 4a7cdf3594..88406a038c 100755 --- a/pkg/kube/cluster-init.sh +++ b/pkg/kube/cluster-init.sh @@ -9,6 +9,8 @@ LONGHORN_VERSION=v1.4.2 CDI_VERSION=v1.56.0 INSTALL_LOG=/var/lib/install.log +CTRD_LOG=/var/lib/containerd.log +LOG_SIZE=$((5*1024*1024)) logmsg() { local MSG @@ -68,6 +70,42 @@ setup_prereqs () { check_network_connection } +check_start_containerd() { + # Needed to get the pods to start + if [ ! -L /usr/bin/runc ]; then + ln -s /var/lib/rancher/k3s/data/current/bin/runc /usr/bin/runc + fi + if [ ! -L /usr/bin/containerd-shim-runc-v2 ]; then + ln -s /var/lib/rancher/k3s/data/current/bin/containerd-shim-runc-v2 /usr/bin/containerd-shim-runc-v2 + fi + + if pgrep -f "containerd --config" >> $INSTALL_LOG 2>&1; then + logmsg "k3s-containerd is alive" + else + logmsg "Starting k3s-containerd" + mkdir -p /run/containerd-user + nohup /var/lib/rancher/k3s/data/current/bin/containerd --config /etc/containerd/config-k3s.toml > $CTRD_LOG 2>&1 & + fi +} +trigger_k3s_selfextraction() { + # Analysis of the k3s source shows nearly any cli command will first self-extract a series of binaries. + # In our case we're looking for the containerd binary. + # k3s check-config appears to be the only cli cmd which doesn't: + # - start a long running process/server + # - timeout connecting to a socket + # - manipulate config/certs + + # When run on the shell this does throw some config errors, its unclear if we need this issues fixed: + # - links: aux/ip6tables should link to iptables-detect.sh (fail) + # - links: aux/ip6tables-restore should link to iptables-detect.sh (fail) + # - links: aux/ip6tables-save should link to iptables-detect.sh (fail) + # - links: aux/iptables should link to iptables-detect.sh (fail) + # - links: aux/iptables-restore should link to iptables-detect.sh (fail) + # - links: aux/iptables-save should link to iptables-detect.sh (fail) + # - apparmor: enabled, but apparmor_parser missing (fail) + /usr/bin/k3s check-config >> $INSTALL_LOG 2>&1 +} + # NOTE: We only support zfs storage in production systems because data is persisted on zvol. # If ZFS is not available we still go ahead and provide the service but the data is lost on reboot # because /var/lib will be on overlayfs. The only reason to allow that is to provide a quick debugging env for developers. @@ -82,7 +120,7 @@ fi setup_prereqs date >> $INSTALL_LOG -HOSTNAME=$(/bin/hostname) + #Forever loop every 15 secs while true; do @@ -94,10 +132,17 @@ if [ ! -f /var/lib/all_components_initialized ]; then /usr/bin/curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${K3S_VERSION} INSTALL_K3S_SKIP_ENABLE=true INSTALL_K3S_BIN_DIR=/var/lib/k3s/bin sh - ln -s /var/lib/k3s/bin/* /usr/bin logmsg "Initializing K3S version $K3S_VERSION" + trigger_k3s_selfextraction + check_start_containerd nohup /usr/bin/k3s server --config /etc/rancher/k3s/config.yaml & #wait until k3s is ready logmsg "Looping until k3s is ready" - until kubectl get node | grep "$HOSTNAME" | awk '{print $2}' | grep 'Ready'; do sleep 5; done + while [ "$(kubectl get node "$(/bin/hostname)" -o json | jq '.status.conditions[] | select(.reason=="KubeletReady") | .status=="True"')" != "true" ]; + do + sleep 5; + done + # Give the embedded etcd in k3s priority over io as its fsync latencies are critical + ionice -c2 -n0 -p "$(pgrep -f "k3s server")" logmsg "k3s is ready on this node" # Default location where clients will look for config ln -s /etc/rancher/k3s/k3s.yaml ~/.kube/config @@ -129,6 +174,7 @@ if [ ! -f /var/lib/all_components_initialized ]; then touch /var/lib/all_components_initialized fi else + check_start_containerd if pgrep k3s >> $INSTALL_LOG 2>&1; then logmsg "k3s is alive " else @@ -137,11 +183,21 @@ else logmsg "Starting k3s server after reboot" nohup /usr/bin/k3s server --config /etc/rancher/k3s/config.yaml & logmsg "Looping until k3s is ready" - until kubectl get node | grep "$HOSTNAME" | awk '{print $2}' | grep 'Ready'; do sleep 5; done + while [ "$(kubectl get node "$(/bin/hostname)" -o json | jq '.status.conditions[] | select(.reason=="KubeletReady") | .status=="True"')" != "true" ]; + do + sleep 5; + done + # Give the embedded etcd in k3s priority over io as its fsync latencies are critical + ionice -c2 -n0 -p "$(pgrep -f "k3s server")" logmsg "k3s is ready on this node" # Default location where clients will look for config ln -s /etc/rancher/k3s/k3s.yaml ~/.kube/config fi fi + currentSize=$(wc -c <"$CTRD_LOG") + if [ "$currentSize" -gt "$LOG_SIZE" ]; then + cp "$CTRD_LOG" "${CTRD_LOG}.1" + truncate -s 0 "$CTRD_LOG" + fi sleep 15 done diff --git a/pkg/kube/config-k3s.toml b/pkg/kube/config-k3s.toml new file mode 100644 index 0000000000..6f4b65e099 --- /dev/null +++ b/pkg/kube/config-k3s.toml @@ -0,0 +1,35 @@ + +# File generated by k3s. DO NOT EDIT. Use config.toml.tmpl instead. +version = 2 + +state = "/run/containerd-user" + +[plugins."io.containerd.internal.v1.opt"] + path = "/var/lib/rancher/k3s/agent/containerd/" +[plugins."io.containerd.grpc.v1.cri"] + stream_server_address = "127.0.0.1" + stream_server_port = "10010" + enable_selinux = false + enable_unprivileged_ports = true + enable_unprivileged_icmp = true + sandbox_image = "rancher/mirrored-pause:3.6" + +[plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "overlayfs" + disable_snapshot_annotations = true + +[grpc] + address = "/run/containerd-user/containerd.sock" + +[plugins."io.containerd.grpc.v1.cri".cni] + bin_dir = "/var/lib/rancher/k3s/data/c26e7571d760c5f199d18efd197114f1ca4ab1e6ffe494f96feb65c87fcb8cf0/bin" + conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d" + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + SystemdCgroup = false + + + diff --git a/pkg/kube/config.yaml b/pkg/kube/config.yaml index 2e36fc76d5..9eba246c6a 100644 --- a/pkg/kube/config.yaml +++ b/pkg/kube/config.yaml @@ -4,3 +4,7 @@ write-kubeconfig-mode: "0644" cluster-init: true log: "/var/lib/rancher/k3s/k3s.log" +# Remove debug flag before release to avoid overlogging +debug: true +etcd-expose-metrics: true +container-runtime-endpoint: "/run/containerd-user/containerd.sock" diff --git a/pkg/pillar/containerd/containerd.go b/pkg/pillar/containerd/containerd.go index 5d2d4df342..d12ba0b147 100644 --- a/pkg/pillar/containerd/containerd.go +++ b/pkg/pillar/containerd/containerd.go @@ -28,6 +28,7 @@ import ( "github.com/containerd/containerd/snapshots" "github.com/containerd/typeurl" "github.com/lf-edge/edge-containers/pkg/resolver" + "github.com/lf-edge/eve/pkg/pillar/base" "github.com/lf-edge/eve/pkg/pillar/types" "github.com/lf-edge/eve/pkg/pillar/vault" "github.com/opencontainers/go-digest" @@ -49,6 +50,8 @@ const ( ctrdSystemServicesNamespace = "services.linuxkit" // ctrdServicesNamespace containerd namespace for running user containers ctrdServicesNamespace = "eve-user-apps" + // ctrdKubeServicesNamespace containerd namespace for running user containers in kube-containerd + ctrdKubeServicesNamespace = "k8s.io" //containerdRunTime - default runtime of containerd containerdRunTime = "io.containerd.runc.v2" // container config file name @@ -73,6 +76,10 @@ const ( var ( // default snapshotter used by containerd defaultSnapshotter = "overlayfs" + // default services namespace + servicesNamespace = ctrdServicesNamespace + // shouldStartUserContainerd tracks if we are starting user-app containerd + shouldStartUserContainerd = true ) // Client is the handle we return to the caller @@ -81,10 +88,10 @@ type Client struct { contentStore content.Store } -// GetServicesNamespace returns ctrdServicesNamespace +// GetServicesNamespace returns defaultServicesNamespace // The value is used to define the cgroups path of the EVE services func GetServicesNamespace() string { - return ctrdServicesNamespace + return servicesNamespace } func init() { @@ -93,6 +100,13 @@ func init() { if vault.ReadPersistType() == types.PersistZFS { defaultSnapshotter = types.ZFSSnapshotter } + + if base.IsHVTypeKube() { + defaultSnapshotter = "overlayfs" + servicesNamespace = ctrdKubeServicesNamespace + // kubevirt image starts its own containerd in the kube container + shouldStartUserContainerd = false + } } // NewContainerdClient returns a *Client @@ -635,7 +649,7 @@ func (client *Client) Resolver(ctx context.Context) (resolver.ResolverCloser, er // CtrNewUserServicesCtx returns a new user service containerd context // and a done func to cancel the context after use. func (client *Client) CtrNewUserServicesCtx() (context.Context, context.CancelFunc) { - return newServiceCtx(ctrdServicesNamespace) + return newServiceCtx(servicesNamespace) } // CtrNewSystemServicesCtx returns a new system service containerd context @@ -647,7 +661,7 @@ func (client *Client) CtrNewSystemServicesCtx() (context.Context, context.Cancel // CtrNewUserServicesCtxWithLease returns a new user service containerd context with a 24 hrs lease // and a done func to delete the lease and cancel the context after use. func (client *Client) CtrNewUserServicesCtxWithLease() (context.Context, context.CancelFunc, error) { - return newServiceCtxWithLease(client.ctrdClient, ctrdServicesNamespace) + return newServiceCtxWithLease(client.ctrdClient, servicesNamespace) } // CtrNewSystemServicesCtxWithLease returns a new system service containerd context with a 24 hrs lease @@ -885,6 +899,9 @@ func (client *Client) UnpackClientImage(clientImage containerd.Image) error { // StartUserContainerdInstance execute user containerd instance in goroutine func StartUserContainerdInstance() error { + if !shouldStartUserContainerd { + return nil + } name := "/usr/bin/containerd" args := []string{"--config", "/etc/containerd/user.toml"} cmd := exec.Command(name, args...)