Skip to content

Commit

Permalink
Switch from the embedded k3s containerd process to one we manage but …
Browse files Browse the repository at this point in the history
…still use the k3s supplied binary to allow us to change the containerd root. Add missing findutils package for longhorn. Raise io priority for embedded etcd. Add Pramodhs containerd.go change to skip starting containerd-user in pillar.

Signed-off-by: Andrew Durbin <136004284+andrewd-zededa@users.noreply.github.com>
  • Loading branch information
andrewd-zededa committed Oct 30, 2023
1 parent fe3146d commit 09b8361
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 4 deletions.
4 changes: 3 additions & 1 deletion pkg/kube/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

FROM lfedge/eve-alpine:12487b9900ba40f3ecdadfec2c84799fa34e5014 as build
ENV PKGS alpine-baselayout musl-utils iproute2 iptables curl openrc \
open-iscsi libvirt libvirt-client util-linux grep
open-iscsi libvirt libvirt-client util-linux grep findutils
RUN eve-alpine-deploy.sh

FROM scratch
Expand All @@ -11,6 +11,8 @@ COPY cluster-init.sh /usr/bin/
COPY cgconfig.conf /etc
# kubevirt yaml files are patched files and will be removed later, look at cluster-init.sh
COPY kubevirt-operator.yaml /etc
RUN mkdir -p /etc/containerd
COPY config-k3s.toml /etc/containerd/
RUN mkdir -p /etc/rancher/k3s
COPY config.yaml /etc/rancher/k3s
WORKDIR /
Expand Down
50 changes: 47 additions & 3 deletions pkg/kube/cluster-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,43 @@ setup_prereqs () {
check_network_connection
}

check_start_containerd() {
# Needed to get the pods to start
if [ ! -L /usr/bin/runc ]; then
ln -s /var/lib/rancher/k3s/data/current/bin/runc /usr/bin/runc
fi
if [ ! -L /usr/bin/containerd-shim-runc-v2 ]; then
ln -s /var/lib/rancher/k3s/data/current/bin/containerd-shim-runc-v2 /usr/bin/containerd-shim-runc-v2
fi

if pgrep -f "containerd --config" >> $INSTALL_LOG 2>&1; then
logmsg "k3s-containerd is alive"
else
logmsg "Starting k3s-containerd"
mkdir -p /run/containerd-user
nohup /var/lib/rancher/k3s/data/current/bin/containerd --config /etc/containerd/config-k3s.toml &
fi
}
trigger_k3s_selfextraction() {
# Analysis of the k3s source shows nearly any cli command will first self-extract a series of binaries.
# In our case we're looking for the containerd binary.
# k3s check-config appears to be the only cli cmd which doesn't:
# - start a long running process/server
# - timeout connecting to a socket
# - manipulate config/certs

# When run on the shell this does throw some config errors, its unclear if we need this issues fixed:
# - links: aux/ip6tables should link to iptables-detect.sh (fail)
#- links: aux/ip6tables-restore should link to iptables-detect.sh (fail)
#- links: aux/ip6tables-save should link to iptables-detect.sh (fail)
#- links: aux/iptables should link to iptables-detect.sh (fail)
#- links: aux/iptables-restore should link to iptables-detect.sh (fail)
#- links: aux/iptables-save should link to iptables-detect.sh (fail)
#- apparmor: enabled, but apparmor_parser missing (fail)
# - CONFIG_INET_XFRM_MODE_TRANSPORT: missing
/usr/bin/k3s check-config >> $INSTALL_LOG 2>&1
}

# NOTE: We only support zfs storage in production systems because data is persisted on zvol.
# If ZFS is not available we still go ahead and provide the service but the data is lost on reboot
# because /var/lib will be on overlayfs. The only reason to allow that is to provide a quick debugging env for developers.
Expand All @@ -82,7 +119,7 @@ fi
setup_prereqs

date >> $INSTALL_LOG
HOSTNAME=$(/bin/hostname)

#Forever loop every 15 secs
while true;
do
Expand All @@ -94,10 +131,14 @@ if [ ! -f /var/lib/all_components_initialized ]; then
/usr/bin/curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${K3S_VERSION} INSTALL_K3S_SKIP_ENABLE=true INSTALL_K3S_BIN_DIR=/var/lib/k3s/bin sh -
ln -s /var/lib/k3s/bin/* /usr/bin
logmsg "Initializing K3S version $K3S_VERSION"
trigger_k3s_selfextraction
check_start_containerd
nohup /usr/bin/k3s server --config /etc/rancher/k3s/config.yaml &
#wait until k3s is ready
logmsg "Looping until k3s is ready"
until kubectl get node | grep "$HOSTNAME" | awk '{print $2}' | grep 'Ready'; do sleep 5; done
until kubectl get node | grep "$(/bin/hostname)" | awk '{print $2}' | grep 'Ready'; do sleep 5; done
# Give the embedded etcd in k3s priority over io as its fsync latencies are critical
ionice -c2 -n0 -p "$(pgrep -f "k3s server")"
logmsg "k3s is ready on this node"
# Default location where clients will look for config
ln -s /etc/rancher/k3s/k3s.yaml ~/.kube/config
Expand Down Expand Up @@ -129,6 +170,7 @@ if [ ! -f /var/lib/all_components_initialized ]; then
touch /var/lib/all_components_initialized
fi
else
check_start_containerd
if pgrep k3s >> $INSTALL_LOG 2>&1; then
logmsg "k3s is alive "
else
Expand All @@ -137,7 +179,9 @@ else
logmsg "Starting k3s server after reboot"
nohup /usr/bin/k3s server --config /etc/rancher/k3s/config.yaml &
logmsg "Looping until k3s is ready"
until kubectl get node | grep "$HOSTNAME" | awk '{print $2}' | grep 'Ready'; do sleep 5; done
until kubectl get node | grep "$(/bin/hostname)" | awk '{print $2}' | grep 'Ready'; do sleep 5; done
# Give the embedded etcd in k3s priority over io as its fsync latencies are critical
ionice -c2 -n0 -p "$(pgrep -f "k3s server")"
logmsg "k3s is ready on this node"
# Default location where clients will look for config
ln -s /etc/rancher/k3s/k3s.yaml ~/.kube/config
Expand Down
3 changes: 3 additions & 0 deletions pkg/kube/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
write-kubeconfig-mode: "0644"
cluster-init: true
log: "/var/lib/rancher/k3s/k3s.log"
debug: true
etcd-expose-metrics: true
container-runtime-endpoint: "/run/containerd-user/containerd.sock"
20 changes: 20 additions & 0 deletions pkg/pillar/containerd/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/containerd/containerd/snapshots"
"github.com/containerd/typeurl"
"github.com/lf-edge/edge-containers/pkg/resolver"
"github.com/lf-edge/eve/pkg/pillar/base"
"github.com/lf-edge/eve/pkg/pillar/types"
"github.com/lf-edge/eve/pkg/pillar/vault"
"github.com/opencontainers/go-digest"
Expand All @@ -49,6 +50,8 @@ const (
ctrdSystemServicesNamespace = "services.linuxkit"
// ctrdServicesNamespace containerd namespace for running user containers
ctrdServicesNamespace = "eve-user-apps"
// ctrdKubeServicesNamespace containerd namespace for running user containers in kube-containerd
ctrdKubeServicesNamespace = "k8s.io"
//containerdRunTime - default runtime of containerd
containerdRunTime = "io.containerd.runc.v2"
// container config file name
Expand Down Expand Up @@ -84,6 +87,9 @@ type Client struct {
// GetServicesNamespace returns ctrdServicesNamespace
// The value is used to define the cgroups path of the EVE services
func GetServicesNamespace() string {
if base.IsHVTypeKube() {
return ctrdKubeServicesNamespace
}

Check warning on line 92 in pkg/pillar/containerd/containerd.go

View check run for this annotation

Codecov / codecov/patch

pkg/pillar/containerd/containerd.go#L90-L92

Added lines #L90 - L92 were not covered by tests
return ctrdServicesNamespace
}

Expand All @@ -93,6 +99,10 @@ func init() {
if vault.ReadPersistType() == types.PersistZFS {
defaultSnapshotter = types.ZFSSnapshotter
}

if base.IsHVTypeKube() {
defaultSnapshotter = "overlayfs"
}

Check warning on line 105 in pkg/pillar/containerd/containerd.go

View check run for this annotation

Codecov / codecov/patch

pkg/pillar/containerd/containerd.go#L104-L105

Added lines #L104 - L105 were not covered by tests
}

// NewContainerdClient returns a *Client
Expand Down Expand Up @@ -635,6 +645,9 @@ func (client *Client) Resolver(ctx context.Context) (resolver.ResolverCloser, er
// CtrNewUserServicesCtx returns a new user service containerd context
// and a done func to cancel the context after use.
func (client *Client) CtrNewUserServicesCtx() (context.Context, context.CancelFunc) {
if base.IsHVTypeKube() {
return newServiceCtx(ctrdKubeServicesNamespace)
}

Check warning on line 650 in pkg/pillar/containerd/containerd.go

View check run for this annotation

Codecov / codecov/patch

pkg/pillar/containerd/containerd.go#L649-L650

Added lines #L649 - L650 were not covered by tests
return newServiceCtx(ctrdServicesNamespace)
}

Expand All @@ -647,6 +660,9 @@ func (client *Client) CtrNewSystemServicesCtx() (context.Context, context.Cancel
// CtrNewUserServicesCtxWithLease returns a new user service containerd context with a 24 hrs lease
// and a done func to delete the lease and cancel the context after use.
func (client *Client) CtrNewUserServicesCtxWithLease() (context.Context, context.CancelFunc, error) {
if base.IsHVTypeKube() {
return newServiceCtxWithLease(client.ctrdClient, ctrdKubeServicesNamespace)
}

Check warning on line 665 in pkg/pillar/containerd/containerd.go

View check run for this annotation

Codecov / codecov/patch

pkg/pillar/containerd/containerd.go#L663-L665

Added lines #L663 - L665 were not covered by tests
return newServiceCtxWithLease(client.ctrdClient, ctrdServicesNamespace)
}

Expand Down Expand Up @@ -885,6 +901,10 @@ func (client *Client) UnpackClientImage(clientImage containerd.Image) error {

// StartUserContainerdInstance execute user containerd instance in goroutine
func StartUserContainerdInstance() error {
// In kubevirt env we do not start eve user containerd, we just use that comes with k3s.
if base.IsHVTypeKube() {
return nil
}

Check warning on line 907 in pkg/pillar/containerd/containerd.go

View check run for this annotation

Codecov / codecov/patch

pkg/pillar/containerd/containerd.go#L904-L907

Added lines #L904 - L907 were not covered by tests
name := "/usr/bin/containerd"
args := []string{"--config", "/etc/containerd/user.toml"}
cmd := exec.Command(name, args...)
Expand Down

0 comments on commit 09b8361

Please sign in to comment.