Skip to content

Commit 2e3c81c

Browse files
authored
Merge pull request #389 from vdice/feat/distro-specific-restarters
feat(*): add K8s distro-specific restarters; update default restarter
2 parents 4a81b1d + 7ca534a commit 2e3c81c

File tree

5 files changed

+137
-25
lines changed

5 files changed

+137
-25
lines changed

.github/workflows/helm-chart-smoketest.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,11 @@ jobs:
158158
- name: label nodes
159159
run: kubectl label node --all spin=true
160160

161-
# MicroK8s runs directly on the host, so both the host's containerd process and MicroK8s' would
162-
# otherwise be detected by runtime-class-manager. As of writing, rcm will fail if more than one
163-
# containerd process is detected when attempting to restart. So, we stop the host process until
164-
# the shim has been installed and the test app has been confirmed to run.
165-
- name: stop system containerd
166-
if: matrix.config.type == 'microk8s'
167-
run: sudo systemctl stop containerd
161+
- name: verify only one installer pod with Succeeded status
162+
# TODO: provisioning on k3d still leads to the first installer pod finishing with provisioner status Unknown and phase Failed
163+
if: matrix.config.type != 'k3d'
164+
run: |
165+
timeout 60s bash -c 'until [[ "$(kubectl -n rcm get $(kubectl get pods -n rcm --no-headers -o name | grep install | head -n1) -o jsonpath="{.status.phase}" 2>/dev/null)" == "Succeeded" ]]; do sleep 2; done'
168166
169167
- name: run Spin App
170168
run: |
@@ -186,7 +184,7 @@ jobs:
186184
kubectl describe runtimeclass wasmtime-spin-v2
187185
188186
# Get install pod logs
189-
# Note: there may be multiple pods pending fix in https://github.com/spinkube/runtime-class-manager/issues/140
187+
# Note: there may be multiple pods pending k3d fix for issue https://github.com/spinkube/runtime-class-manager/issues/140
190188
install_pod=$(kubectl get pods -n rcm --no-headers -o name | awk '{if ($1 ~ "-spin-v2-install") print $0}' | tail -n 1)
191189
kubectl describe -n rcm $install_pod || true
192190
kubectl logs -n rcm -c downloader $install_pod || true

images/installer/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ COPY . .
1010
RUN CGO_ENABLED=0 go build -o rcm-node-installer ./cmd/node-installer
1111
RUN /app/rcm-node-installer -h
1212

13-
FROM scratch
13+
# Using busybox instead of scratch so that the nsenter utility is present, as used in restarter logic
14+
FROM busybox:1.37
1415
COPY --from=builder /app/rcm-node-installer /rcm-node-installer
1516

1617
ENTRYPOINT ["/rcm-node-installer"]

internal/containerd/restart_unix.go

Lines changed: 116 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,35 +22,139 @@ package containerd
2222
import (
2323
"fmt"
2424
"log/slog"
25+
"os"
26+
"os/exec"
27+
"regexp"
2528
"syscall"
2629

2730
"github.com/mitchellh/go-ps"
2831
)
2932

3033
var psProcesses = ps.Processes
3134

32-
type restarter struct{}
35+
type defaultRestarter struct{}
3336

34-
func NewRestarter() Restarter {
35-
return restarter{}
37+
func NewDefaultRestarter() Restarter {
38+
return defaultRestarter{}
3639
}
3740

38-
func (c restarter) Restart() error {
39-
pid, err := getPid()
41+
func (c defaultRestarter) Restart() error {
42+
// If listing systemd units succeeds, prefer systemctl restart; otherwise kill pid
43+
if _, err := listSystemdUnits(); err == nil {
44+
out, err := nsenterCmd("systemctl", "restart", "containerd").CombinedOutput()
45+
slog.Debug(string(out))
46+
if err != nil {
47+
return fmt.Errorf("unable to restart containerd: %w", err)
48+
}
49+
} else {
50+
pid, err := getPid("containerd")
51+
if err != nil {
52+
return err
53+
}
54+
slog.Debug("found containerd process", "pid", pid)
55+
56+
err = syscall.Kill(pid, syscall.SIGHUP)
57+
if err != nil {
58+
return fmt.Errorf("failed to send SIGHUP to containerd: %w", err)
59+
}
60+
}
61+
62+
return nil
63+
}
64+
65+
type K0sRestarter struct{}
66+
67+
func (c K0sRestarter) Restart() error {
68+
// First, collect systemd units to determine which mode k0s is running in, eg
69+
// k0sworker or k0scontroller
70+
units, err := listSystemdUnits()
71+
if err != nil {
72+
return fmt.Errorf("unable to list systemd units: %w", err)
73+
}
74+
service := regexp.MustCompile("k0sworker|k0scontroller").FindString(string(units))
75+
76+
out, err := nsenterCmd("systemctl", "restart", service).CombinedOutput()
77+
slog.Debug(string(out))
78+
if err != nil {
79+
return fmt.Errorf("unable to restart %s: %w", service, err)
80+
}
81+
82+
return nil
83+
}
84+
85+
type K3sRestarter struct{}
86+
87+
func (c K3sRestarter) Restart() error {
88+
// This restarter will be used both for stock K3s distros, which use systemd as well as K3d, which does not.
89+
90+
// If listing systemd units succeeds, prefer systemctl restart; otherwise kill pid
91+
if _, err := listSystemdUnits(); err == nil {
92+
out, err := nsenterCmd("systemctl", "restart", "k3s").CombinedOutput()
93+
slog.Debug(string(out))
94+
if err != nil {
95+
return fmt.Errorf("unable to restart k3s: %w", err)
96+
}
97+
} else {
98+
// TODO: this approach still leads to the behavior mentioned in https://github.com/spinframework/runtime-class-manager/issues/140:
99+
// The first pod's provisioner container exits with code 255, leading to pod status Unknown,
100+
// followed by the subsequent pod's provisioner container no-op-ing and finishing with status Completed.
101+
pid, err := getPid("k3s")
102+
if err != nil {
103+
return err
104+
}
105+
slog.Debug("found k3s process", "pid", pid)
106+
107+
err = syscall.Kill(pid, syscall.SIGHUP)
108+
if err != nil {
109+
return fmt.Errorf("failed to send SIGHUP to k3s: %w", err)
110+
}
111+
}
112+
113+
return nil
114+
}
115+
116+
type MicroK8sRestarter struct{}
117+
118+
func (c MicroK8sRestarter) Restart() error {
119+
out, err := nsenterCmd("systemctl", "restart", "snap.microk8s.daemon-containerd").CombinedOutput()
120+
slog.Debug(string(out))
40121
if err != nil {
41-
return err
122+
return fmt.Errorf("unable to restart snap.microk8s.daemon-containerd: %w", err)
42123
}
43-
slog.Debug("found containerd process", "pid", pid)
44124

45-
err = syscall.Kill(pid, syscall.SIGHUP)
125+
return nil
126+
}
127+
128+
type RKE2Restarter struct{}
129+
130+
func (c RKE2Restarter) Restart() error {
131+
// First, collect systemd units to determine which mode rke2 is running in, eg
132+
// rke2-agent or rke2-server
133+
units, err := listSystemdUnits()
134+
if err != nil {
135+
return fmt.Errorf("unable to list systemd units: %w", err)
136+
}
137+
service := regexp.MustCompile("rke2-agent|rke2-server").FindString(string(units))
46138

139+
out, err := nsenterCmd("systemctl", "restart", service).CombinedOutput()
140+
slog.Debug(string(out))
47141
if err != nil {
48-
return fmt.Errorf("failed to send SIGHUP to containerd: %w", err)
142+
return fmt.Errorf("unable to restart %s: %w", service, err)
49143
}
144+
50145
return nil
51146
}
52147

53-
func getPid() (int, error) {
148+
func listSystemdUnits() ([]byte, error) {
149+
return nsenterCmd("systemctl", "list-units", "--type", "service").CombinedOutput()
150+
}
151+
152+
func nsenterCmd(cmd ...string) *exec.Cmd {
153+
return exec.Command("nsenter",
154+
append([]string{fmt.Sprintf("-m/%s/proc/1/ns/mnt", os.Getenv("HOST_ROOT")), "--"}, cmd...)...) // #nosec G204
155+
}
156+
157+
func getPid(executable string) (int, error) {
54158
processes, err := psProcesses()
55159
if err != nil {
56160
return 0, fmt.Errorf("could not get processes: %w", err)
@@ -59,13 +163,13 @@ func getPid() (int, error) {
59163
var containerdProcesses = []ps.Process{}
60164

61165
for _, process := range processes {
62-
if process.Executable() == "containerd" {
166+
if process.Executable() == executable {
63167
containerdProcesses = append(containerdProcesses, process)
64168
}
65169
}
66170

67171
if len(containerdProcesses) != 1 {
68-
return 0, fmt.Errorf("need exactly one containerd process, found: %d", len(containerdProcesses))
172+
return 0, fmt.Errorf("need exactly one %s process, found: %d", executable, len(containerdProcesses))
69173
}
70174

71175
return containerdProcesses[0].Pid(), nil

internal/containerd/restart_unix_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func Test_getPid(t *testing.T) {
5757
for _, tt := range tests {
5858
t.Run(tt.name, func(t *testing.T) {
5959
psProcesses = tt.psProccessesMock
60-
got, err := getPid()
60+
got, err := getPid("containerd")
6161

6262
if tt.wantErr {
6363
require.Error(t, err)

internal/preset/preset.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ type Env struct {
2424
var Default = Settings{
2525
ConfigPath: "/etc/containerd/config.toml",
2626
Setup: func(_ Env) error { return nil },
27-
Restarter: containerd.NewRestarter(),
27+
Restarter: containerd.NewDefaultRestarter(),
2828
}
2929

3030
func (s Settings) WithConfigPath(path string) Settings {
@@ -37,9 +37,16 @@ func (s Settings) WithSetup(setup func(env Env) error) Settings {
3737
return s
3838
}
3939

40-
var MicroK8s = Default.WithConfigPath("/var/snap/microk8s/current/args/containerd-template.toml")
40+
func (s Settings) WithRestarter(restarter containerd.Restarter) Settings {
41+
s.Restarter = restarter
42+
return s
43+
}
44+
45+
var MicroK8s = Default.WithConfigPath("/var/snap/microk8s/current/args/containerd-template.toml").
46+
WithRestarter(containerd.MicroK8sRestarter{})
4147

4248
var RKE2 = Default.WithConfigPath("/var/lib/rancher/rke2/agent/etc/containerd/config.toml.tmpl").
49+
WithRestarter(containerd.RKE2Restarter{}).
4350
WithSetup(func(env Env) error {
4451
_, err := env.HostFs.Stat(env.ConfigPath)
4552
if err == nil {
@@ -75,9 +82,11 @@ var RKE2 = Default.WithConfigPath("/var/lib/rancher/rke2/agent/etc/containerd/co
7582
return err
7683
})
7784

78-
var K3s = RKE2.WithConfigPath("/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl")
85+
var K3s = RKE2.WithConfigPath("/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl").
86+
WithRestarter(containerd.K3sRestarter{})
7987

8088
var K0s = Default.WithConfigPath("/etc/k0s/containerd.d/config.toml").
89+
WithRestarter(containerd.K0sRestarter{}).
8190
WithSetup(func(env Env) error {
8291
_, err := env.HostFs.Stat(env.ConfigPath)
8392
if err == nil {

0 commit comments

Comments
 (0)