Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

neonvm: hacking on adding virtio-fs support #963

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions neonvm/apis/neonvm/v1/virtualmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,10 @@ type Disk struct {
type DiskSource struct {
// EmptyDisk represents a temporary empty qcow2 disk that shares a vm's lifetime.
EmptyDisk *EmptyDiskSource `json:"emptyDisk,omitempty"`
// Virtiofs represents a virtiofs-backed device created in an empty directory alongside the VM,
// that shares the VM's lifetime.
// +optional
Virtiofs *VirtiofsSource `json:"virtiofs,omitempty"`
// configMap represents a configMap that should populate this disk
// +optional
ConfigMap *corev1.ConfigMapVolumeSource `json:"configMap,omitempty"`
Expand All @@ -441,6 +445,11 @@ type EmptyDiskSource struct {
Discard bool `json:"discard,omitempty"`
}

type VirtiofsSource struct {
// SizeLimit sets the maximum size of the volume mount containing the virtiofs directory
SizeLimit resource.Quantity `json:"sizeLimit"`
}

type TmpfsDiskSource struct {
Size resource.Quantity `json:"size"`
}
Expand Down
21 changes: 21 additions & 0 deletions neonvm/apis/neonvm/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion neonvm/config/controller/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
selector:
matchLabels:
control-plane: controller
replicas: 3
replicas: 1 # temporary, to make debugging easier
template:
metadata:
annotations:
Expand Down
16 changes: 16 additions & 0 deletions neonvm/config/crd/bases/vm.neon.tech_virtualmachines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1068,6 +1068,22 @@ spec:
required:
- size
type: object
virtiofs:
description: Virtiofs represents a virtiofs-backed device created
in an empty directory alongside the VM, that shares the VM's
lifetime.
properties:
sizeLimit:
anyOf:
- type: integer
- type: string
description: SizeLimit sets the maximum size of the volume
mount containing the virtiofs directory
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
required:
- sizeLimit
type: object
required:
- mountPath
- name
Expand Down
4 changes: 2 additions & 2 deletions neonvm/controllers/catch_panic.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ func (r *catchPanicReconciler) Reconcile(ctx context.Context, req ctrl.Request)

defer func() {
if v := recover(); v != nil {
err = fmt.Errorf("Reconcile panicked: %v", v)
log.Error(err, "stack", string(debug.Stack()))
err = fmt.Errorf("panicked with: %v", v)
log.Error(err, "Reconcile panicked", "stack", string(debug.Stack()))
}
}()

Expand Down
58 changes: 43 additions & 15 deletions neonvm/controllers/vm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"os"
"reflect"
"strconv"
"strings"
"time"

nadapiv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
Expand Down Expand Up @@ -429,7 +430,8 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
return err
}
// runner pod found, check phase
switch runnerStatus(vmRunner) {
status, reason := runnerStatus(vmRunner)
switch status {
case runnerRunning:
vm.Status.PodIP = vmRunner.Status.PodIP
vm.Status.Phase = vmv1.VmRunning
Expand Down Expand Up @@ -458,6 +460,7 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
Message: fmt.Sprintf("Pod (%s) for VirtualMachine (%s) succeeded", vm.Status.PodName, vm.Name)})
case runnerFailed:
vm.Status.Phase = vmv1.VmFailed
r.Recorder.Event(vm, "Warning", "Failed", fmt.Sprintf("Runner pod %s failed because: %s", vm.Status.PodName, reason))
meta.SetStatusCondition(&vm.Status.Conditions,
metav1.Condition{Type: typeDegradedVirtualMachine,
Status: metav1.ConditionTrue,
Expand Down Expand Up @@ -500,7 +503,8 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
}

// runner pod found, check/update phase now
switch runnerStatus(vmRunner) {
status, reason := runnerStatus(vmRunner)
switch status {
case runnerRunning:
// update status by IP of runner pod
vm.Status.PodIP = vmRunner.Status.PodIP
Expand Down Expand Up @@ -578,6 +582,7 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
Message: fmt.Sprintf("Pod (%s) for VirtualMachine (%s) succeeded", vm.Status.PodName, vm.Name)})
case runnerFailed:
vm.Status.Phase = vmv1.VmFailed
r.Recorder.Event(vm, "Warning", "Failed", fmt.Sprintf("Runner pod %s failed because: %s", vm.Status.PodName, reason))
meta.SetStatusCondition(&vm.Status.Conditions,
metav1.Condition{Type: typeDegradedVirtualMachine,
Status: metav1.ConditionTrue,
Expand Down Expand Up @@ -621,7 +626,8 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
}

// runner pod found, check that it's still up:
switch runnerStatus(vmRunner) {
status, reason := runnerStatus(vmRunner)
switch status {
case runnerSucceeded:
vm.Status.Phase = vmv1.VmSucceeded
meta.SetStatusCondition(&vm.Status.Conditions,
Expand All @@ -632,6 +638,7 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
return nil
case runnerFailed:
vm.Status.Phase = vmv1.VmFailed
r.Recorder.Event(vm, "Warning", "Failed", fmt.Sprintf("Runner pod %s failed because: %s", vm.Status.PodName, reason))
meta.SetStatusCondition(&vm.Status.Conditions,
metav1.Condition{Type: typeDegradedVirtualMachine,
Status: metav1.ConditionTrue,
Expand Down Expand Up @@ -838,20 +845,20 @@ const (
// container other than neonvm-runner has exited
// - runnerSucceeded, if pod.Status.Phase is Succeeded, or if neonvm-runner has exited
// successfully
func runnerStatus(pod *corev1.Pod) runnerStatusKind {
func runnerStatus(pod *corev1.Pod) (_ runnerStatusKind, reason string) {
switch pod.Status.Phase {
case "", corev1.PodPending:
return runnerPending
return runnerPending, "Pod Pending"
case corev1.PodSucceeded:
return runnerSucceeded
return runnerSucceeded, "Pod Succeeded"
case corev1.PodFailed:
return runnerFailed
return runnerFailed, "Pod Failed"
case corev1.PodUnknown:
return runnerUnknown
return runnerUnknown, "Pod Unknown"

// See comment above for context on this logic
case corev1.PodRunning:
nonRunnerContainerSucceeded := false
nonRunnerContainersSucceeded := []string{}
runnerContainerSucceeded := false

for _, stat := range pod.Status.ContainerStatuses {
Expand All @@ -861,7 +868,11 @@ func runnerStatus(pod *corev1.Pod) runnerStatusKind {

if failed {
// return that the "runner" has failed if any container has.
return runnerFailed
return runnerFailed, fmt.Sprintf(
"Container %s ExitCode = %d",
stat.Name,
stat.State.Terminated.ExitCode,
)
} else /* succeeded */ {
if isRunner {
// neonvm-runner succeeded. We'll return runnerSucceeded if no other
Expand All @@ -871,18 +882,24 @@ func runnerStatus(pod *corev1.Pod) runnerStatusKind {
// Other container has succeeded. We'll return runnerSucceeded if
// neonvm-runner has succeeded, but runnerFailed if this exited while
// neonvm-runner is still going.
nonRunnerContainerSucceeded = true
nonRunnerContainersSucceeded = append(nonRunnerContainersSucceeded, stat.Name)
}
}
}
}

if runnerContainerSucceeded {
return runnerSucceeded
} else if nonRunnerContainerSucceeded {
return runnerFailed
return runnerSucceeded, "Pod Running, neonvm-runner succeeded"
} else if len(nonRunnerContainersSucceeded) > 0 {
var msgStart string
if len(nonRunnerContainersSucceeded) == 1 {
msgStart = fmt.Sprintf("Container %s", nonRunnerContainersSucceeded[0])
} else {
msgStart = fmt.Sprintf("Containers %s", strings.Join(nonRunnerContainersSucceeded, ","))
}
return runnerFailed, fmt.Sprintf("%s succeeded, but not neonvm-runner", msgStart)
} else {
return runnerRunning
return runnerRunning, "Pod and all containers Running"
}

default:
Expand Down Expand Up @@ -1679,6 +1696,17 @@ func podSpec(vm *vmv1.VirtualMachine, sshSecret *corev1.Secret, config *Reconcil
},
},
})
case disk.Virtiofs != nil:
mnt.MountPath = fmt.Sprintf("/vm/mounts/%s", disk.Name) // TODO: cloud#14473
pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, mnt)
pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{
Name: disk.Name,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
SizeLimit: &disk.Virtiofs.SizeLimit,
},
},
})
default:
// do nothing
}
Expand Down
5 changes: 3 additions & 2 deletions neonvm/controllers/vmmigration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
}

// now inspect target pod status and update migration
switch runnerStatus(targetRunner) {
status, reason := runnerStatus(targetRunner)
switch status {
case runnerRunning:
// update migration status
migration.Status.SourcePodName = vm.Status.PodName
Expand Down Expand Up @@ -359,7 +360,7 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
migration.Status.Phase = vmv1.VmmFailed
return r.updateMigrationStatus(ctx, migration)
case runnerFailed:
message := fmt.Sprintf("Target Pod (%s) failed", targetRunner.Name)
message := fmt.Sprintf("Target Pod (%s) failed because: %s", targetRunner.Name, reason)
log.Info(message)
r.Recorder.Event(migration, "Warning", "Failed", message)
meta.SetStatusCondition(&migration.Status.Conditions,
Expand Down
28 changes: 22 additions & 6 deletions neonvm/runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ COPY pkg/util pkg/util
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o /runner neonvm/runner/main.go
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o /container-mgr neonvm/runner/container-mgr/*.go

FROM alpine:3.16 as crictl
FROM alpine:3.18 as crictl

RUN apk add --no-cache \
curl
Expand All @@ -38,6 +38,23 @@ ENV VERSION="v1.25.0"
RUN curl -L "https://github.com/kubernetes-sigs/cri-tools/releases/download/$VERSION/crictl-$VERSION-linux-amd64.tar.gz" -o crictl.tar.gz \
&& tar zxvf crictl.tar.gz -C /

# Also build virtiofsd
FROM rust:1.78-alpine as virtiofsd
WORKDIR /workspace

RUN apk add musl-dev git libcap-ng-static libseccomp-static

# == 83057321d7920ab04c324981318ad62ee1a6d986
# Latest release (2024-06-06) as of 2024-06-15. First release including live migration.
ENV BRANCH v1.11.0

RUN git clone --depth 1 --branch $BRANCH https://gitlab.com/virtio-fs/virtiofsd
RUN env LIBCAPNG_LINK_TYPE=static LIBCAPNG_LIB_PATH=/usr/lib/ \
LIBSECCOMP_LINK_TYPE=static LIBSECCOMP_LIB_PATH=/usr/lib \
cargo install --path virtiofsd \
&& mv /usr/local/cargo/bin/virtiofsd /virtiofsd

# Put it together
FROM alpine:3.16

RUN apk add --no-cache \
Expand All @@ -54,14 +71,13 @@ RUN apk add --no-cache \
e2fsprogs \
qemu-system-x86_64 \
qemu-img \
parted \
sfdisk \
cgroup-tools \
openssh

COPY --from=builder /runner /usr/bin/runner
COPY --from=builder /container-mgr /usr/bin/container-mgr
COPY --from=crictl /crictl /usr/bin/crictl
COPY --from=builder /runner /usr/bin/runner
COPY --from=builder /container-mgr /usr/bin/container-mgr
COPY --from=crictl /crictl /usr/bin/crictl
COPY --from=virtiofsd /virtiofsd /usr/bin/virtiofsd
COPY neonvm/hack/kernel/vmlinuz /vm/kernel/vmlinuz
COPY neonvm/runner/ssh_config /etc/ssh/ssh_config

Expand Down
Loading
Loading