Skip to content

Commit 92ea6ad

Browse files
committed
Switch to virtiofs
Closes: #1812 The key benefits here are: - This also works for RHEL (this is a *big deal* for my dev workflow parity) - It correctly handles symlinks - It's more maintained (e.g. used for kata containers) and hopefully we won't hit obscure bugs like the 9p OOM ones.
1 parent 77e3d12 commit 92ea6ad

12 files changed

+72
-81
lines changed

docs/working.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,6 @@ allowing you to directly execute binaries from there. You can also use e.g.
164164
`rpm-ostree usroverlay` and then copy binaries from your host `/run/workdir` into
165165
the VM's rootfs.
166166

167-
(This currently only works on Fedora CoreOS which ships `9p`, not RHCOS. A future version
168-
will use https://virtio-fs.gitlab.io/ )
169-
170167
## Using host binaries
171168

172169
Another related trick is:

mantle/cmd/kola/qemuexec.go

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"context"
2121
"fmt"
2222
"os"
23-
"path/filepath"
2423
"strconv"
2524
"strings"
2625

@@ -91,7 +90,7 @@ func init() {
9190
cmdQemuExec.Flags().BoolVarP(&devshellConsole, "devshell-console", "c", false, "Connect directly to serial console in devshell mode")
9291
cmdQemuExec.Flags().StringVarP(&ignition, "ignition", "i", "", "Path to Ignition config")
9392
cmdQemuExec.Flags().StringVarP(&butane, "butane", "B", "", "Path to Butane config")
94-
cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount readonly via 9pfs a host directory (use --bind-ro=/path/to/host,/var/mnt/guest")
93+
cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount readonly via virtiofs a host directory (use --bind-ro=/path/to/host,/var/mnt/guest")
9594
cmdQemuExec.Flags().StringArrayVar(&bindrw, "bind-rw", nil, "Same as above, but writable")
9695
cmdQemuExec.Flags().BoolVarP(&forceConfigInjection, "inject-ignition", "", false, "Force injecting Ignition config using guestfs")
9796
cmdQemuExec.Flags().BoolVar(&propagateInitramfsFailure, "propagate-initramfs-failure", false, "Error out if the system fails in the initramfs")
@@ -178,14 +177,11 @@ func runQemuExec(cmd *cobra.Command, args []string) error {
178177
ignitionFragments = append(ignitionFragments, "autologin")
179178
cpuCountHost = true
180179
usernet = true
181-
// Can't use 9p on RHEL8, need https://virtio-fs.gitlab.io/ instead in the future
182-
if kola.Options.CosaWorkdir != "" && !strings.HasPrefix(filepath.Base(kola.QEMUOptions.DiskImage), "rhcos") && !strings.HasPrefix(filepath.Base(kola.QEMUOptions.DiskImage), "scos") && kola.Options.Distribution != "rhcos" && kola.Options.Distribution != "scos" {
183-
// Conservatively bind readonly to avoid anything in the guest (stray tests, whatever)
184-
// from destroying stuff
185-
bindro = append(bindro, fmt.Sprintf("%s,/var/mnt/workdir", kola.Options.CosaWorkdir))
186-
// But provide the tempdir so it's easy to pass stuff back
187-
bindrw = append(bindrw, fmt.Sprintf("%s,/var/mnt/workdir-tmp", kola.Options.CosaWorkdir+"/tmp"))
188-
}
180+
// Conservatively bind readonly to avoid anything in the guest (stray tests, whatever)
181+
// from destroying stuff
182+
bindro = append(bindro, fmt.Sprintf("%s,/var/mnt/workdir", kola.Options.CosaWorkdir))
183+
// But provide the tempdir so it's easy to pass stuff back
184+
bindrw = append(bindrw, fmt.Sprintf("%s,/var/mnt/workdir-tmp", kola.Options.CosaWorkdir+"/tmp"))
189185
if hostname == "" {
190186
hostname = devshellHostname
191187
}
@@ -261,18 +257,18 @@ func runQemuExec(cmd *cobra.Command, args []string) error {
261257
if err != nil {
262258
return err
263259
}
264-
builder.Mount9p(src, dest, true)
260+
builder.MountVirtiofs(src, dest)
265261
ensureConfig()
266-
config.Mount9p(dest, true)
262+
config.MountVirtiofs(dest, true)
267263
}
268264
for _, b := range bindrw {
269265
src, dest, err := parseBindOpt(b)
270266
if err != nil {
271267
return err
272268
}
273-
builder.Mount9p(src, dest, false)
269+
builder.MountVirtiofs(src, dest)
274270
ensureConfig()
275-
config.Mount9p(dest, false)
271+
config.MountVirtiofs(dest, false)
276272
}
277273
builder.ForceConfigInjection = forceConfigInjection
278274
if len(firstbootkargs) > 0 {

mantle/platform/conf/conf.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,11 +1362,11 @@ resize_terminal() {
13621362
PROMPT_COMMAND+=(resize_terminal)`, 0644)
13631363
}
13641364

1365-
// Mount9p adds an Ignition config to mount an folder with 9p
1366-
func (c *Conf) Mount9p(dest string, readonly bool) {
1365+
// MountVirtiofs adds an Ignition config to mount an folder with 9p
1366+
func (c *Conf) MountVirtiofs(dest string, readonly bool) {
13671367
readonlyStr := ""
13681368
if readonly {
1369-
readonlyStr = ",ro"
1369+
readonlyStr = "ro"
13701370
}
13711371
content := fmt.Sprintf(`[Unit]
13721372
DefaultDependencies=no
@@ -1375,8 +1375,8 @@ Before=basic.target
13751375
[Mount]
13761376
What=%s
13771377
Where=%s
1378-
Type=9p
1379-
Options=trans=virtio,version=9p2000.L%s,msize=10485760
1378+
Type=virtiofs
1379+
Options=%s
13801380
[Install]
13811381
WantedBy=multi-user.target
13821382
`, dest, dest, readonlyStr)

mantle/platform/qemu.go

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
// Why not libvirt?
1818
// Two main reasons. First, we really do want to use qemu, and not
1919
// something else. We rely on qemu features/APIs and there's a general
20-
// assumption that the qemu process is local (e.g. we expose 9p filesystem
20+
// assumption that the qemu process is local (e.g. we expose virtiofs filesystem
2121
// sharing). Second, libvirt runs as a daemon, but we want the
2222
// VMs "lifecycle bound" to their creating process (e.g. kola),
2323
// so that e.g. Ctrl-C (SIGINT) kills both reliably.
@@ -154,11 +154,12 @@ type bootIso struct {
154154

155155
// QemuInstance holds an instantiated VM through its lifecycle.
156156
type QemuInstance struct {
157-
qemu exec.Cmd
158-
architecture string
159-
tempdir string
160-
swtpm exec.Cmd
161-
nbdServers []exec.Cmd
157+
qemu exec.Cmd
158+
architecture string
159+
tempdir string
160+
swtpm exec.Cmd
161+
// Helpers are child processes such as nbd or virtiofsd that should be lifecycle bound to qemu
162+
helpers []exec.Cmd
162163
hostForwardedPorts []HostForwardPort
163164

164165
journalPipe *os.File
@@ -296,12 +297,12 @@ func (inst *QemuInstance) Destroy() {
296297
inst.swtpm.Kill() //nolint // Ignore errors
297298
inst.swtpm = nil
298299
}
299-
for _, nbdServ := range inst.nbdServers {
300-
if nbdServ != nil {
301-
nbdServ.Kill() //nolint // Ignore errors
300+
for _, p := range inst.helpers {
301+
if p != nil {
302+
p.Kill() //nolint // Ignore errors
302303
}
303304
}
304-
inst.nbdServers = nil
305+
inst.helpers = nil
305306

306307
if inst.tempdir != "" {
307308
if err := os.RemoveAll(inst.tempdir); err != nil {
@@ -430,6 +431,11 @@ func (inst *QemuInstance) RemovePrimaryBlockDevice() (err2 error) {
430431
return nil
431432
}
432433

434+
type VirtiofsMount struct {
435+
src string
436+
tag string
437+
}
438+
433439
// QemuBuilder is a configurator that can then create a qemu instance
434440
type QemuBuilder struct {
435441
// ConfigFile is a path to Ignition configuration
@@ -486,7 +492,8 @@ type QemuBuilder struct {
486492
finalized bool
487493
diskID uint
488494
disks []*Disk
489-
fs9pID uint
495+
// virtiofs is an array of virtiofs mounts
496+
virtiofs []VirtiofsMount
490497
// virtioSerialID is incremented for each device
491498
virtioSerialID uint
492499
// fds is file descriptors we own to pass to qemu
@@ -711,16 +718,10 @@ func (builder *QemuBuilder) encryptIgnitionConfig() error {
711718
return nil
712719
}
713720

714-
// Mount9p sets up a mount point from the host to guest. To be replaced
715-
// with https://virtio-fs.gitlab.io/ once it lands everywhere.
716-
func (builder *QemuBuilder) Mount9p(source, destHint string, readonly bool) {
717-
builder.fs9pID++
718-
readonlyStr := ""
719-
if readonly {
720-
readonlyStr = ",readonly=on"
721-
}
722-
builder.Append("--fsdev", fmt.Sprintf("local,id=fs%d,path=%s,security_model=mapped%s", builder.fs9pID, source, readonlyStr))
723-
builder.Append("-device", virtio(builder.architecture, "9p", fmt.Sprintf("fsdev=fs%d,mount_tag=%s", builder.fs9pID, destHint)))
721+
// MountVirtiofs sets up a mount point from the host to guest.
722+
// Note that virtiofs does not currently support read-only mounts (which is really surprising!)
723+
func (builder *QemuBuilder) MountVirtiofs(source, dest string) {
724+
builder.virtiofs = append(builder.virtiofs, VirtiofsMount{src: source, tag: dest})
724725
}
725726

726727
// supportsFwCfg if the target system supports injecting
@@ -1636,7 +1637,7 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) {
16361637
if err := cmd.Start(); err != nil {
16371638
return nil, errors.Wrapf(err, "spawing nbd server")
16381639
}
1639-
inst.nbdServers = append(inst.nbdServers, cmd)
1640+
inst.helpers = append(inst.helpers, cmd)
16401641
}
16411642
}
16421643

@@ -1716,6 +1717,31 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) {
17161717
return nil, err
17171718
}
17181719

1720+
// Process virtiofs mounts
1721+
if len(builder.virtiofs) > 0 {
1722+
if err := builder.ensureTempdir(); err != nil {
1723+
return nil, err
1724+
}
1725+
1726+
// Allocate a shared memory object and set up NUMA, which is needed for virtiofs
1727+
builder.Append("-object", fmt.Sprintf("memory-backend-file,id=mem,size=%dM,mem-path=/dev/shm,share=on", builder.Memory), "-numa", "node,memdev=mem")
1728+
1729+
// We assume we're already running in a container
1730+
isolation := "none"
1731+
1732+
for i, virtiofs := range builder.virtiofs {
1733+
virtiofsChar := fmt.Sprintf("virtiofschar%d", i)
1734+
virtiofsdSocket := filepath.Join(builder.tempdir, fmt.Sprintf("virtiofsd-%d.sock", i))
1735+
builder.Append("-chardev", fmt.Sprintf("socket,id=%s,path=%s", virtiofsChar, virtiofsdSocket))
1736+
builder.Append("-device", fmt.Sprintf("vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=%s", virtiofsChar, virtiofs.tag))
1737+
p := exec.Command("/usr/libexec/virtiofsd", "--sandbox", isolation, "--socket-path", virtiofsdSocket, "--shared-dir", virtiofs.src)
1738+
if err := p.Start(); err != nil {
1739+
return nil, fmt.Errorf("Failed to start virtiofsd")
1740+
}
1741+
inst.helpers = append(inst.helpers, p)
1742+
}
1743+
}
1744+
17191745
fdnum := 3 // first additional file starts at position 3
17201746
for i := range builder.fds {
17211747
fdset := i + 1 // Start at 1

src/buildextend-legacy-oscontainer.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ set -euo pipefail
55

66
# Start VM and call buildah
77
final_outfile=$(realpath "$1"); shift
8-
if [ "$(uname -m)" == 'ppc64le' ]; then
9-
# helps with 9p 'cannot allocate memory' errors on ppc64le
10-
COSA_SUPERMIN_MEMORY=6144
11-
fi
128
IMAGE_TYPE=legacy-oscontainer
139
prepare_build
1410
tmp_outfile=${tmp_builddir}/legacy-oscontainer.ociarchive

src/cmd-buildextend-metal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ if jq -re '.["deploy-via-container"]' < "${image_json}"; then
172172
deploy_via_container="true"
173173
fi
174174
container_imgref=$(jq -r '.["container-imgref"]//""' < "${image_json}")
175-
# Nowadays we pull the container across 9p rather than accessing the repo, see
175+
# Nowadays we pull the container across virtiofs rather than accessing the repo, see
176176
# https://github.com/openshift/os/issues/594
177177
ostree_container=ostree-unverified-image:oci-archive:$builddir/$(meta_key images.ostree.path)
178178

src/cmd-run

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ set -euo pipefail
99
# You can exit via either exiting the login session cleanly
1010
# in SSH (ctrl-d/exit in bash), or via `poweroff`.
1111
#
12-
# If 9p is available and this is started from a coreos-assembler
12+
# If this is started from a coreos-assembler
1313
# working directory, the build directory will be mounted readonly
1414
# at /var/mnt/workdir, and the tmp/ directory will be read *write*
1515
# at /var/mnt/workdir-tmp. This allows you to easily exchange

src/cmdlib.sh

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ meta_key() {
676676
}
677677

678678
# runvm generates and runs a minimal VM which we use to "bootstrap" our build
679-
# process. It mounts the workdir via 9p. If you need to add new packages into
679+
# process. It mounts the workdir via virtiofs. If you need to add new packages into
680680
# the vm, update `vmdeps.txt`.
681681
# If you need to debug it, one trick is to change the `-serial file` below
682682
# into `-serial stdio`, drop the <&- and virtio-serial stuff and then e.g. add
@@ -790,28 +790,16 @@ EOF
790790
esac
791791

792792
kola_args=(kola qemuexec -m "${COSA_SUPERMIN_MEMORY:-${memory_default}}" --auto-cpus -U --workdir none \
793-
--console-to-file "${runvm_console}")
793+
--console-to-file "${runvm_console}" --bind-rw "${workdir},workdir")
794794

795795
base_qemu_args=(-drive 'if=none,id=root,format=raw,snapshot=on,file='"${vmbuilddir}"'/root,index=1' \
796796
-device 'virtio-blk,drive=root' \
797797
-kernel "${vmbuilddir}/kernel" -initrd "${vmbuilddir}/initrd" \
798798
-no-reboot -nodefaults \
799799
-device virtio-serial \
800-
-virtfs 'local,id=workdir,path='"${workdir}"',security_model=none,mount_tag=workdir' \
801800
-append "root=UUID=${superminrootfsuuid} console=${DEFAULT_TERMINAL} selinux=1 enforcing=0 autorelabel=1" \
802801
)
803802

804-
# support local dev cases where src/config is a symlink. Note if you change or extend to this set,
805-
# you also need to update supermin-init-prelude.sh to mount it inside the VM.
806-
for maybe_symlink in "${workdir}"/{src/config,src/yumrepos,builds}; do
807-
if [ -L "${maybe_symlink}" ]; then
808-
# qemu follows symlinks
809-
local bn
810-
bn=$(basename "${maybe_symlink}")
811-
base_qemu_args+=("-virtfs" "local,id=${bn},path=${maybe_symlink},security_model=none,mount_tag=${bn}")
812-
fi
813-
done
814-
815803
if [ -z "${RUNVM_SHELL:-}" ]; then
816804
if ! "${kola_args[@]}" -- "${base_qemu_args[@]}" \
817805
-device virtserialport,chardev=virtioserial0,name=cosa-cmdout \

src/compose.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ composejson=cache/repo/compose.json
99
rm -rf "${repo}"
1010
ostree init --repo="${repo}" --mode=archive-z2
1111

12-
# we do need to pull at least the overlay bits over 9p, but it shouldn't be that
12+
# we do need to pull at least the overlay bits over virtiofs, but it shouldn't be that
1313
# many files
1414
ostree refs --repo tmp/repo overlay --list | \
1515
xargs -r ostree pull-local --repo "${repo}" tmp/repo
1616
# And import commit metadata for all refs; this will bring in the previous
1717
# build if there is one. Ideally, we'd import the SELinux policy too to take
1818
# advantage of https://github.com/coreos/rpm-ostree/pull/1704 but that's yet
19-
# more files over 9p and our pipelines don't have cache anyway (and devs likely
19+
# more files over virtiofs and our pipelines don't have cache anyway (and devs likely
2020
# use the privileged path).
2121
ostree refs --repo tmp/repo | \
2222
xargs -r ostree pull-local --commit-metadata-only --repo "${repo}" tmp/repo

src/create_disk.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ if [ -z "$platforms_json" ]; then
7777
echo "Missing --platforms-json" >&2
7878
exit 1
7979
fi
80-
# just copy it over to /tmp and work from there to minimize 9p I/O
80+
# just copy it over to /tmp and work from there to minimize virtiofs I/O
8181
cp "${platforms_json}" /tmp/platforms.json
8282
platforms_json=/tmp/platforms.json
8383
platform_grub_cmds=$(jq -r ".${arch}.${platform}.grub_commands // [] | join(\"\\\\n\")" < "${platforms_json}")

src/deps.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ genisoimage
2929
make git rpm-build
3030

3131
# virt dependencies
32-
libguestfs-tools libguestfs-tools-c /usr/bin/qemu-img qemu-kvm swtpm
32+
libguestfs-tools libguestfs-tools-c virtiofsd /usr/bin/qemu-img qemu-kvm swtpm
3333
# And the main arch emulators for cross-arch testing
3434
qemu-system-aarch64-core qemu-system-ppc-core qemu-system-s390x-core qemu-system-x86-core
3535

src/supermin-init-prelude.sh

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ mount -t tmpfs tmpfs /dev/shm
1515
# load selinux policy
1616
LANG=C /sbin/load_policy -i
1717

18-
# load kernel module for 9pnet_virtio for 9pfs mount
19-
/sbin/modprobe 9pnet_virtio
2018

2119
# need fuse module for rofiles-fuse/bwrap during post scripts run
2220
/sbin/modprobe fuse
@@ -36,19 +34,9 @@ fi
3634
umask 002
3735

3836
# set up workdir
39-
# For 9p mounts set msize to 100MiB
4037
# https://github.com/coreos/coreos-assembler/issues/2171
4138
mkdir -p "${workdir:?}"
42-
mount -t 9p -o rw,trans=virtio,version=9p2000.L,msize=10485760 workdir "${workdir}"
43-
44-
# This loop pairs with virtfs setups for qemu in cmdlib.sh. Keep them in sync.
45-
for maybe_symlink in "${workdir}"/{src/config,src/yumrepos,builds}; do
46-
if [ -L "${maybe_symlink}" ]; then
47-
bn=$(basename "${maybe_symlink}")
48-
mkdir -p "$(readlink "${maybe_symlink}")"
49-
mount -t 9p -o rw,trans=virtio,version=9p2000.L,msize=10485760 "${bn}" "${maybe_symlink}"
50-
fi
51-
done
39+
mount -t virtiofs -o rw workdir "${workdir}"
5240

5341
mkdir -p "${workdir}"/cache
5442
cachedev=$(blkid -lt LABEL=cosa-cache -o device || true)

0 commit comments

Comments
 (0)