From f382adb00b83d033074970f153c96b4917333563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Tue, 27 Aug 2024 13:46:30 -0700 Subject: [PATCH] Use virtiofsd for sharing file system data with host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch over to using virtiofsd for sharing file system data with the host. virtiofs is a file system designed for the needs of virtual machines and environments. That is in contrast to 9P fs, which we currently use for sharing data with the host, which is first and foremost a network file system. 9P is problematic if for no other reason that it lacks proper support for usage of the "open-unlink-fstat idiom", in which files are unlinked and later referenced via file descriptor (see #83). virtiofs does not have this problem. This change replaces usage of 9P with that of virtiofs. In order to work, virtiofs needs a user space server. The current state-of-the-art implementation (virtiofsd) is implemented in Rust and so we interface directly with the library. Most of this code is extracted straight from virtiofsd, as it's a lot of boilerplate. An alternative approach is to install the binary via distribution packages or from crates.io, but availability (and discovery) can be a bit of a challenge. Note that this now means that both libcap-ng as well as libseccomp need to be installed. I benchmarked both the current master as well as this version with a bare-bones custom kernel: Benchmark 1: target/release/vmtest -k bzImage-9p 'echo test' Time (mean ± σ): 1.316 s ± 0.087 s [User: 0.462 s, System: 1.104 s] Range (min … max): 1.232 s … 1.463 s 10 runs Benchmark 1: target/release/vmtest -k bzImage-virtiofsd 'echo test' Time (mean ± σ): 1.244 s ± 0.011 s [User: 0.307 s, System: 0.358 s] Range (min … max): 1.227 s … 1.260 s 10 runs So it seems there is a ~0.7s speed up, on average (and significantly less system time being used). This is great, but I suspect a more pronounced speed advantage will be visible when working with large files, in which virtiofs is said to significantly outperform 9P (typically >2x from what I understand, but I have not done any benchmarks of that nature). A few other notes: - we solely rely on guest level read-only mounts to enforce read-only state. The virtiofsd recommended way is to use read-only bind mounts [0], but doing so would require root. - we are not using DAX, because it still is still incomplete and apparently requires building Qemu (?) from source. In any event, it should not change anything functionally and be solely a performance improvement. I have adjusted the configs, but because I don't have Docker handy I can't really create those kernel. CI seems incapable of producing the artifacts without doing a fully-blown release dance. No idea what empty is about, really. I suspect the test failures we see are because it lacks support? Some additional resources worth keeping around: - https://virtio-fs.gitlab.io/howto-boot.html - https://virtio-fs.gitlab.io/howto-qemu.html [0] https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md?ref_type=heads#faq [1] https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/src/main.rs?ref_type=heads#L1242 Closes: #16 Closes: #83 Signed-off-by: Daniel Müller --- .github/workflows/rust.yml | 8 +- Cargo.lock | 382 ++++++++++++++++++++++++- Cargo.toml | 8 + README.md | 11 +- src/lib.rs | 1 + src/output.rs | 5 + src/qemu.rs | 128 +++++---- src/ui.rs | 10 + src/virtiofsd.rs | 562 +++++++++++++++++++++++++++++++++++++ tests/kernels/archlinux | 4 +- tests/kernels/fedora38 | 4 +- tests/test.rs | 2 +- 12 files changed, 1054 insertions(+), 71 deletions(-) create mode 100644 src/virtiofsd.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8e7787b..e8143d9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -29,14 +29,14 @@ jobs: override: true components: rustfmt, clippy - - name: Build - run: make - - name: Install test deps run: | sudo apt-get update # Virtualization deps - sudo apt-get install -y qemu-system-x86-64 qemu-guest-agent qemu-utils ovmf + sudo apt-get install -y qemu-system-x86-64 qemu-guest-agent qemu-utils ovmf libcap-ng-dev libseccomp2 + + - name: Build + run: make - name: Cache test assets uses: actions/cache@v3 diff --git a/Cargo.lock b/Cargo.lock index 6192774..e68cc79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,6 +65,32 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -89,6 +115,22 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "capng" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a26766f93f07f7e8b8309ed2824fa2a68f5d12d219de855e24688e9fbe89e85" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -135,6 +177,12 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "colorchoice" version = "1.0.0" @@ -160,18 +208,58 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "critical-section" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64009896348fc5af4222e9cf7d7d82a95a256c634ebcf61c53e4ea461422242" + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "either" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "encode_unicode" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "env_logger" version = "0.10.0" @@ -195,6 +283,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "version_check", +] + [[package]] name = "fastrand" version = "2.0.1" @@ -241,6 +338,7 @@ dependencies = [ "futures-core", "futures-task", "futures-util", + "num_cpus", ] [[package]] @@ -313,18 +411,61 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version", + "serde", + "spin", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +[[package]] +name = "hostname" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" +dependencies = [ + "libc", + "match_cfg", + "winapi", +] + [[package]] name = "humantime" version = "2.1.0" @@ -337,7 +478,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.3", "rustix", "windows-sys 0.48.0", ] @@ -365,9 +506,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" + +[[package]] +name = "libseccomp-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7cbbd4ad467251987c6e5b47d53b11a5a05add08f2447a9e2d70aef1e0d138" [[package]] name = "linux-raw-sys" @@ -375,12 +522,28 @@ version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "match_cfg" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" + [[package]] name = "memchr" version = "2.6.4" @@ -410,6 +573,31 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.3", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -422,6 +610,25 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "postcard" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless", + "serde", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -708,6 +915,21 @@ dependencies = [ "autocfg", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.10.0" @@ -725,6 +947,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syslog" +version = "6.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc7e95b5b795122fafe6519e27629b5ab4232c73ebb2428f568e82b1a457ad3" +dependencies = [ + "error-chain", + "hostname", + "libc", + "log", + "time", +] + [[package]] name = "tempfile" version = "3.8.1" @@ -778,6 +1013,39 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -815,6 +1083,104 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vhost" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6be08d1166d41a78861ad50212ab3f9eca0729c349ac3a7a8f557c62406b87cc" +dependencies = [ + "bitflags 2.4.1", + "libc", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vhost-user-backend" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f0ffb1dd8e00a708a0e2c32d5efec5812953819888591fff9ff68236b8a5096" +dependencies = [ + "libc", + "log", + "vhost", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "virtio-bindings" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "878bcb1b2812a10c30d53b0ed054999de3d98f25ece91fc173973f9c57aaae86" + +[[package]] +name = "virtio-queue" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07d8406e7250c934462de585d8f2d2781c31819bca1fbb7c5e964ca6bbaabfe8" +dependencies = [ + "log", + "virtio-bindings", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "virtiofsd" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf6eb4267f16cdf3c540a1d40c7477ec88fa1dd82498600b8ab6c9759647642" +dependencies = [ + "bitflags 1.3.2", + "capng", + "clap", + "env_logger 0.8.4", + "futures", + "libc", + "libseccomp-sys", + "log", + "postcard", + "serde", + "syslog", + "vhost", + "vhost-user-backend", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vm-memory" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3aba5064cc5f6f7740cddc8dae34d2d9a311cac69b60d942af7f3ab8fc49f4" +dependencies = [ + "arc-swap", + "libc", + "thiserror", + "winapi", +] + +[[package]] +name = "vmm-sys-util" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + [[package]] name = "vmtest" version = "0.14.0" @@ -822,8 +1188,9 @@ dependencies = [ "anyhow", "clap", "console", - "env_logger", + "env_logger 0.10.0", "itertools", + "libc", "log", "qapi", "rand", @@ -837,6 +1204,13 @@ dependencies = [ "test-log", "tinytemplate", "toml", + "vhost", + "vhost-user-backend", + "virtio-bindings", + "virtio-queue", + "virtiofsd", + "vm-memory", + "vmm-sys-util", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index bb76c7c..92b774f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ clap = { version = "4.0.26", features = ["derive", "string"] } console = "0.15.5" env_logger = "0.10.0" itertools = "0.10.5" +libc = "0.2.158" log = "0.4.17" qapi = { version = "0.14.0", features = ["qmp", "qga"] } rand = "0.8.5" @@ -25,6 +26,13 @@ serde_derive = "1.0.147" tempfile = "3.5.0" tinytemplate = "1.2.1" toml = "0.5.9" +vhost = "0.11.0" +vhost-user-backend = "0.15.0" +virtio-bindings = "0.2.1" +virtio-queue = "0.12.0" +virtiofsd = "1.11.1" +vm-memory = { version = "0.14.1", features = ["backend-mmap", "backend-atomic"] } +vmm-sys-util = "0.12.1" [dev-dependencies] rexpect = "0.5" diff --git a/README.md b/README.md index 5f34157..5d0cea9 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,16 @@ The following are required dependencies, grouped by location: Host machine: -* [`qemu`](https://pkgs.org/download/qemu) +* [`qemu`](https://pkgs.org/download/qemu) (version 5.9 or higher) * [`qemu-guest-agent`](https://pkgs.org/search/?q=qemu-guest-agent) * [`OVMF`](https://pkgs.org/download/ovmf) +* libcap-ng +* libseccomp Virtual machine image: * `qemu-guest-agent` -* Kernel 9p filesystem support, either compiled in or as modules (see kernel +* Kernel `virtiofs` support, either compiled in or as modules (see kernel dependencies) * Most (if not all) distros already ship support as modules or better @@ -42,9 +44,8 @@ Kernel: * `CONFIG_VIRTIO=y` * `CONFIG_VIRTIO_PCI=y` * `CONFIG_VIRTIO_CONSOLE=y` -* `CONFIG_NET_9P=y` -* `CONFIG_NET_9P_VIRTIO=y` -* `CONFIG_9P_FS=y` +* `CONFIG_FUSE_FS=y` +* `CONFIG_VIRTIO_FS=y` Note the virtual machine image dependencies are only required if you're using the `image` target parameter. Likewise, the same applies for kernel diff --git a/src/lib.rs b/src/lib.rs index 7c7de7c..93988c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,3 +17,4 @@ pub use crate::vmtest::*; mod qemu; mod qga; mod util; +mod virtiofsd; diff --git a/src/output.rs b/src/output.rs index 440e242..e8cd0c2 100644 --- a/src/output.rs +++ b/src/output.rs @@ -10,6 +10,11 @@ use anyhow::Result; /// Receivers should treat failures as terminal and not expect any more /// updates. pub enum Output { + /// On-host initialization starts + InitializeStart, + /// Initialization finished with provided with provided result + InitializeEnd(Result<()>), + /// VM boot begins BootStart, /// Output related to VM boot diff --git a/src/qemu.rs b/src/qemu.rs index 652543d..ca49b04 100644 --- a/src/qemu.rs +++ b/src/qemu.rs @@ -27,18 +27,17 @@ use tinytemplate::{format_unescaped, TinyTemplate}; use crate::output::Output; use crate::qga::QgaWrapper; use crate::util::gen_sock; +use crate::virtiofsd::Virtiofsd; use crate::{Mount, Target, VMConfig}; const INIT_TEMPLATE: &str = include_str!("init/init.sh.template"); const COMMAND_TEMPLATE: &str = include_str!("init/command.template"); -// Needs to be `/dev/root` for kernel to "find" the 9pfs as rootfs -const ROOTFS_9P_FS_MOUNT_TAG: &str = "/dev/root"; -const SHARED_9P_FS_MOUNT_TAG: &str = "vmtest-shared"; +const ROOT_FS_MOUNT_TAG: &str = "rootfs"; +const SHARED_FS_MOUNT_TAG: &str = "vmtest-shared"; const COMMAND_OUTPUT_PORT_NAME: &str = "org.qemu.virtio_serial.0"; const MAGIC_INTERACTIVE_COMMAND: &str = "-"; -const SHARED_9P_FS_MOUNT_PATH: &str = "/mnt/vmtest"; -const MOUNT_OPTS_9P_FS: &str = "trans=virtio,cache=mmap,msize=1048576"; +const SHARED_FS_MOUNT_PATH: &str = "/mnt/vmtest"; const OVMF_PATHS: &[&str] = &[ // Fedora "/usr/share/edk2/ovmf/OVMF_CODE.fd", @@ -55,6 +54,8 @@ type QmpUnixStream = qapi::Stream, UnixStream>; /// Represents a single QEMU instance pub struct Qemu { process: Command, + /// `virtiofsd` instances for each of the mounts in use. + virtiofsds: Vec, qga_sock: PathBuf, qmp_sock: PathBuf, command: String, @@ -241,6 +242,18 @@ fn guest_agent_args(sock: &Path) -> Vec { args } +/// Generate general arguments necessary for working with `virtiofs`. +fn virtiofs_general_args(vm: &VMConfig) -> Vec { + let mut args: Vec = Vec::new(); + + args.push("-object".into()); + args.push(format!("memory-backend-memfd,id=mem,share=on,size={}", vm.memory.as_str()).into()); + args.push("-numa".into()); + args.push("node,memdev=mem".into()); + + args +} + /// Generate arguments for full KVM virtualization if host supports it fn kvm_args(arch: &str) -> Vec<&'static str> { let mut args = Vec::new(); @@ -291,30 +304,17 @@ fn machine_protocol_args(sock: &Path) -> Vec { args } -/// Generate arguments for setting up 9p FS server on host +/// Generate per-file-system arguments necessary for working with `virtiofs`. /// -/// `id` is the ID for the FS export (currently unused AFAICT) +/// `id` is the ID for the FS export /// `mount_tag` is used inside guest to find the export -fn plan9_fs_args(host_shared: &Path, id: &str, mount_tag: &str, ro: bool) -> Vec { +fn virtiofs_per_fs_args(virtiofsd: &Virtiofsd, id: &str, mount_tag: &str) -> Vec { let mut args: Vec = Vec::new(); - args.push("-virtfs".into()); - - let mut arg = OsString::new(); - arg.push(format!("local,id={id},path=")); - arg.push(if host_shared.as_os_str().is_empty() { - // This case occurs when the config file path is just "vmtest.toml" - Path::new(".") - } else { - host_shared - }); - arg.push(format!( - ",mount_tag={mount_tag},security_model=none,multidevs=remap" - )); - if ro { - arg.push(",readonly=on") - } - args.push(arg); + args.push("-chardev".into()); + args.push(format!("socket,id={id},path={}", virtiofsd.socket_path().display()).into()); + args.push("-device".into()); + args.push(format!("vhost-user-fs-pci,queue-size=1024,chardev={id},tag={mount_tag}").into()); args } @@ -371,9 +371,9 @@ fn kernel_args( // The guest kernel command line args let mut cmdline: Vec = Vec::new(); - // Tell kernel the rootfs is 9p - cmdline.push("rootfstype=9p".into()); - cmdline.push(format!("rootflags={}", MOUNT_OPTS_9P_FS).into()); + // Tell kernel the rootfs is on a virtiofs and what "tag" it uses. + cmdline.push("rootfstype=virtiofs".into()); + cmdline.push(format!("root={ROOT_FS_MOUNT_TAG}").into()); // Mount rootfs readable/writable to make experience more smooth. // Lots of tools expect to be able to write logs or change global @@ -455,16 +455,6 @@ fn vmconfig_args(vm: &VMConfig) -> Vec { vm.memory.clone().into(), ]; - for mount in vm.mounts.values() { - let name = format!("mount{}", hash(&mount.host_path)); - args.append(&mut plan9_fs_args( - &mount.host_path, - &name, - &name, - !mount.writable, - )); - } - let mut extra_args = vm .extra_args .clone() @@ -650,6 +640,7 @@ impl Qemu { let command_sock = gen_sock("cmdout"); let (init, guest_init) = gen_init(&target.rootfs).context("Failed to generate init")?; + let mut virtiofsds = Vec::new(); let mut c = Command::new(format!("qemu-system-{}", target.arch)); c.args(QEMU_DEFAULT_ARGS) @@ -660,6 +651,7 @@ impl Qemu { .args(machine_args(&target.arch)) .args(machine_protocol_args(&qmp_sock)) .args(guest_agent_args(&qga_sock)) + .args(virtiofs_general_args(&target.vm)) .args(virtio_serial_args(&command_sock)); // Always ensure the rootfs is first. if let Some(image) = &target.image { @@ -668,11 +660,11 @@ impl Qemu { c.args(uefi_firmware_args(target.vm.bios.as_deref())); } } else if let Some(kernel) = &target.kernel { - c.args(plan9_fs_args( - target.rootfs.as_path(), + let virtiofsd = Virtiofsd::new(target.rootfs.as_path())?; + c.args(virtiofs_per_fs_args( + &virtiofsd, "root", - ROOTFS_9P_FS_MOUNT_TAG, - false, + ROOT_FS_MOUNT_TAG, )); c.args(kernel_args( kernel, @@ -680,16 +672,30 @@ impl Qemu { guest_init.as_path(), target.kernel_args.as_ref(), )); + virtiofsds.push(virtiofsd); } else { panic!("Config validation should've enforced XOR"); } + // Now add the shared mount and other extra mounts. - c.args(plan9_fs_args( - host_shared, + let virtiofsd = Virtiofsd::new(host_shared)?; + c.args(virtiofs_per_fs_args( + &virtiofsd, "shared", - SHARED_9P_FS_MOUNT_TAG, - false, + SHARED_FS_MOUNT_TAG, )); + virtiofsds.push(virtiofsd); + + for mount in target.vm.mounts.values() { + let name = format!("mount{}", hash(&mount.host_path)); + let virtiofsd = Virtiofsd::new(&mount.host_path)?; + c.args(virtiofs_per_fs_args( + &virtiofsd, + &name, + &name, + )); + virtiofsds.push(virtiofsd); + } c.args(vmconfig_args(&target.vm)); if log_enabled!(Level::Error) { @@ -706,6 +712,7 @@ impl Qemu { let mut qemu = Self { process: c, + virtiofsds, qga_sock, qmp_sock, command: target.command, @@ -838,16 +845,18 @@ impl Qemu { // We can race with VM/qemu coming up. So retry a few times with growing backoff. let mut rc = 0; for i in 0..5 { - let mount_opts = if ro { - format!("{},ro", MOUNT_OPTS_9P_FS) - } else { - MOUNT_OPTS_9P_FS.into() - }; + let mut args = vec![ + "-t", "virtiofs", mount_tag, guest_path + ]; + if ro { + args.push("-oro") + } + rc = run_in_vm( qga, &output_fn, "mount", - &["-t", "9p", "-o", &mount_opts, mount_tag, guest_path], + &args, false, None, )?; @@ -1052,7 +1061,7 @@ impl Qemu { // Mount shared directory inside guest let _ = self.updates.send(Output::SetupStart); if let Err(e) = - self.mount_in_guest(qga, SHARED_9P_FS_MOUNT_PATH, SHARED_9P_FS_MOUNT_TAG, false) + self.mount_in_guest(qga, SHARED_FS_MOUNT_PATH, SHARED_FS_MOUNT_TAG, false) { return Err(e).context("Failed to mount shared directory in guest"); } @@ -1075,6 +1084,19 @@ impl Qemu { /// Errors and return status are reported through the `updates` channel passed into the /// constructor. pub fn run(mut self) { + let _ = self.updates.send(Output::InitializeStart); + for virtiofsd in self.virtiofsds.iter_mut() { + match virtiofsd.launch() { + Ok(()) => (), + Err(e) => { + let _ = self.updates.send(Output::InitializeEnd(Err(e))); + return; + } + } + } + + let _ = self.updates.send(Output::InitializeEnd(Ok(()))); + // Start QEMU let (mut child, qga, mut qmp) = match self.boot_vm() { Ok((c, qga, qmp)) => (c, qga, qmp), diff --git a/src/ui.rs b/src/ui.rs index 63b8273..80a7f7d 100644 --- a/src/ui.rs +++ b/src/ui.rs @@ -177,6 +177,16 @@ impl Ui { }; match &msg { + Output::InitializeStart => { + stage = Stage::new(term.clone(), &heading("Initializing host environment", 2), Some(stage)); + stages += 1; + } + Output::InitializeEnd(r) => { + if let Err(e) = r { + error_out_stage(&mut stage, e); + errors += 1; + } + } Output::BootStart => { stage = Stage::new(term.clone(), &heading("Booting", 2), Some(stage)); stages += 1; diff --git a/src/virtiofsd.rs b/src/virtiofsd.rs new file mode 100644 index 0000000..160068a --- /dev/null +++ b/src/virtiofsd.rs @@ -0,0 +1,562 @@ +use std::fs::File; +use std::io; +use std::path::Path; +use std::path::PathBuf; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::sync::Mutex; +use std::sync::RwLock; +use std::thread; +use std::thread::JoinHandle; +use std::time::Duration; + +use anyhow::Context as _; +use anyhow::Error; +use anyhow::Result; + +use log::error; +use log::warn; + +use vhost::vhost_user::message::VhostTransferStateDirection; +use vhost::vhost_user::message::VhostTransferStatePhase; +use vhost::vhost_user::Backend; +use vhost::vhost_user::VhostUserProtocolFeatures; +use vhost::vhost_user::VhostUserVirtioFeatures; +use vhost_user_backend::bitmap::BitmapMmapRegion; +use vhost_user_backend::VhostUserBackend; +use vhost_user_backend::VhostUserDaemon; +use vhost_user_backend::VringMutex; +use vhost_user_backend::VringState; +use vhost_user_backend::VringT; + +use virtio_bindings::virtio_config::VIRTIO_F_VERSION_1; +use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; +use virtio_bindings::virtio_ring::VIRTIO_RING_F_INDIRECT_DESC; +use virtio_queue::DescriptorChain; +use virtio_queue::QueueOwnedT; +use virtiofsd::descriptor_utils::Reader; +use virtiofsd::descriptor_utils::Writer; +use virtiofsd::filesystem::{FileSystem, SerializableFileSystem}; +use virtiofsd::passthrough; +use virtiofsd::passthrough::CachePolicy; +use virtiofsd::passthrough::PassthroughFs; +use virtiofsd::server::Server; + +use vm_memory::GuestAddressSpace; +use vm_memory::GuestMemoryAtomic; +use vm_memory::GuestMemoryLoadGuard; +use vm_memory::GuestMemoryMmap; + +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use crate::util::gen_sock; + +type LoggedMemory = GuestMemoryMmap; +type LoggedMemoryAtomic = GuestMemoryAtomic; + +const QUEUE_SIZE: usize = 32768; +// The spec allows for multiple request queues. We currently only support one. +const REQUEST_QUEUES: u32 = 1; +// In addition to the request queue there is one high-prio queue. +// Since VIRTIO_FS_F_NOTIFICATION is not advertised we do not have a +// notification queue. +const NUM_QUEUES: usize = REQUEST_QUEUES as usize + 1; +// The guest queued an available buffer for the high priority queue. +const HIPRIO_QUEUE_EVENT: u16 = 0; +// The guest queued an available buffer for the request queue. +const REQ_QUEUE_EVENT: u16 = 1; + +struct VhostUserFsThread { + mem: Option, + kill_evt: EventFd, + server: Arc>, + // handle request from backend to frontend + vu_req: Option, + event_idx: bool, +} + +impl Clone for VhostUserFsThread { + fn clone(&self) -> Self { + VhostUserFsThread { + mem: self.mem.clone(), + kill_evt: self.kill_evt.try_clone().unwrap(), + server: self.server.clone(), + vu_req: self.vu_req.clone(), + event_idx: self.event_idx, + } + } +} + +impl VhostUserFsThread { + fn new(fs: F) -> Result { + Ok(VhostUserFsThread { + mem: None, + kill_evt: EventFd::new(libc::EFD_NONBLOCK).context("failed to create eventfd")?, + server: Arc::new(Server::new(fs)), + vu_req: None, + event_idx: false, + }) + } + + fn return_descriptor( + vring_state: &mut VringState, + head_index: u16, + event_idx: bool, + len: usize, + ) { + let used_len: u32 = match len.try_into() { + Ok(l) => l, + Err(_) => panic!("Invalid used length, can't return used descriptors to the ring"), + }; + + if vring_state.add_used(head_index, used_len).is_err() { + warn!("couldn't return used descriptors to the ring"); + } + + if event_idx { + match vring_state.needs_notification() { + Err(_) => { + warn!("couldn't check if queue needs to be notified"); + vring_state.signal_used_queue().unwrap(); + } + Ok(needs_notification) => { + if needs_notification { + vring_state.signal_used_queue().unwrap(); + } + } + } + } else { + vring_state.signal_used_queue().unwrap(); + } + } + + fn process_queue_serial( + &self, + vring_state: &mut VringState, + ) -> io::Result { + let mut used_any = false; + let mem = match self.mem.as_ref() { + Some(m) => m.memory(), + None => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "no memory configuration present", + )) + } + }; + + let mut vu_req = self.vu_req.clone(); + + let avail_chains: Vec>> = vring_state + .get_queue_mut() + .iter(mem.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))? + .collect(); + + for chain in avail_chains { + used_any = true; + + let head_index = chain.head_index(); + + let reader = Reader::new(&mem, chain.clone()).unwrap(); + let writer = Writer::new(&mem, chain.clone()).unwrap(); + + let len = self + .server + .handle_message(reader, writer, vu_req.as_mut()) + .unwrap(); + + Self::return_descriptor(vring_state, head_index, self.event_idx, len); + } + + Ok(used_any) + } + + fn handle_event_serial( + &self, + device_event: u16, + vrings: &[VringMutex], + ) -> io::Result<()> { + let mut vring_state = match device_event { + HIPRIO_QUEUE_EVENT => vrings[0].get_mut(), + REQ_QUEUE_EVENT => vrings[1].get_mut(), + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("received unknown device event: {device_event}"), + )) + } + }; + + if self.event_idx { + // vm-virtio's Queue implementation only checks avail_index + // once, so to properly support EVENT_IDX we need to keep + // calling process_queue() until it stops finding new + // requests on the queue. + loop { + vring_state.disable_notification().unwrap(); + // we can't recover from an error here, so let's hope it's transient + if let Err(e) = self.process_queue_serial(&mut vring_state) { + error!("processing the vring: {e}"); + } + if !vring_state.enable_notification().unwrap() { + break; + } + } + } else { + // Without EVENT_IDX, a single call is enough. + self.process_queue_serial(&mut vring_state)?; + } + + Ok(()) + } +} + +struct PremigrationThread { + handle: JoinHandle>, + cancel: Arc, +} + +struct VhostUserFsBackend { + thread: RwLock>, + premigration_thread: Mutex>, + migration_thread: Mutex>>>, +} + +impl VhostUserFsBackend { + fn new(fs: F) -> Result { + let thread = RwLock::new(VhostUserFsThread::new(fs)?); + Ok(VhostUserFsBackend { + thread, + premigration_thread: None.into(), + migration_thread: None.into(), + }) + } +} + +impl VhostUserBackend + for VhostUserFsBackend +{ + type Bitmap = BitmapMmapRegion; + type Vring = VringMutex; + + fn num_queues(&self) -> usize { + NUM_QUEUES + } + + fn max_queue_size(&self) -> usize { + QUEUE_SIZE + } + + fn features(&self) -> u64 { + 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() + | VhostUserVirtioFeatures::LOG_ALL.bits() + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::MQ + | VhostUserProtocolFeatures::BACKEND_REQ + | VhostUserProtocolFeatures::BACKEND_SEND_FD + | VhostUserProtocolFeatures::REPLY_ACK + | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS + | VhostUserProtocolFeatures::LOG_SHMFD + | VhostUserProtocolFeatures::DEVICE_STATE + } + + fn get_config(&self, _offset: u32, _size: u32) -> Vec { + // We should never receive a `get_config` call, because we don't + // have a tag set. + unimplemented!() + } + + fn acked_features(&self, features: u64) { + if features & VhostUserVirtioFeatures::LOG_ALL.bits() != 0 { + // F_LOG_ALL set: Prepare for migration (unless we're already doing that) + let mut premigration_thread = self.premigration_thread.lock().unwrap(); + if premigration_thread.is_none() { + let cancel = Arc::new(AtomicBool::new(false)); + let cloned_server = Arc::clone(&self.thread.read().unwrap().server); + let cloned_cancel = Arc::clone(&cancel); + let handle = + thread::spawn(move || cloned_server.prepare_serialization(cloned_cancel)); + *premigration_thread = Some(PremigrationThread { handle, cancel }); + } + } else { + // F_LOG_ALL cleared: Migration cancelled, if any was ongoing + // (Note that this is our interpretation, and not said by the specification. The back + // end might clear this flag also on the source side once the VM has been stopped, even + // before we receive SET_DEVICE_STATE_FD. QEMU will clear F_LOG_ALL only when the VM + // is running, i.e. when the source resumes after a cancelled migration, which is + // exactly what we want, but it would be better if we had a more reliable way that is + // backed up by the spec. We could delay cancelling until we receive a guest request + // while F_LOG_ALL is cleared, but that can take an indefinite amount of time.) + if let Some(premigration_thread) = self.premigration_thread.lock().unwrap().take() { + premigration_thread.cancel.store(true, Ordering::Relaxed); + // Ignore the result, we are cancelling anyway + let _ = premigration_thread.handle.join(); + } + } + } + + fn set_event_idx(&self, enabled: bool) { + self.thread.write().unwrap().event_idx = enabled; + } + + fn update_memory(&self, mem: LoggedMemoryAtomic) -> io::Result<()> { + self.thread.write().unwrap().mem = Some(mem); + Ok(()) + } + + fn handle_event( + &self, + device_event: u16, + evset: EventSet, + vrings: &[VringMutex], + _thread_id: usize, + ) -> io::Result<()> { + if evset != EventSet::IN { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "invalid event set", + )); + } + + let thread = self.thread.read().unwrap(); + thread.handle_event_serial(device_event, vrings) + } + + fn exit_event(&self, _thread_index: usize) -> Option { + Some(self.thread.read().unwrap().kill_evt.try_clone().unwrap()) + } + + fn set_backend_req_fd(&self, vu_req: Backend) { + self.thread.write().unwrap().vu_req = Some(vu_req); + } + + fn set_device_state_fd( + &self, + direction: VhostTransferStateDirection, + phase: VhostTransferStatePhase, + file: File, + ) -> io::Result> { + // Our caller (vhost-user-backend crate) pretty much ignores error objects we return (only + // cares whether we succeed or not), so log errors here + if let Err(err) = self.do_set_device_state_fd(direction, phase, file) { + error!("Failed to initiate state (de-)serialization: {err}"); + return Err(err); + } + Ok(None) + } + + fn check_device_state(&self) -> io::Result<()> { + // Our caller (vhost-user-backend crate) pretty much ignores error objects we return (only + // cares whether we succeed or not), so log errors here + if let Err(err) = self.do_check_device_state() { + error!("Failed to conclude migration: {err}"); + return Err(err); + } + Ok(()) + } +} + +impl VhostUserFsBackend { + fn do_set_device_state_fd( + &self, + direction: VhostTransferStateDirection, + phase: VhostTransferStatePhase, + file: File, + ) -> io::Result<()> { + if phase != VhostTransferStatePhase::STOPPED { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("Transfer in phase {:?} is not supported", phase), + )); + } + + let server = Arc::clone(&self.thread.read().unwrap().server); + let join_handle = match direction { + VhostTransferStateDirection::SAVE => { + // We should have a premigration thread that was started with `F_LOG_ALL`. It + // should already be finished, but you never know. + let premigration_thread = self.premigration_thread.lock().unwrap().take(); + + thread::spawn(move || { + if let Some(premigration_thread) = premigration_thread { + // Let’s hope it’s finished. Otherwise, we block migration downtime for a + // bit longer, but there’s nothing we can do. + let _ = premigration_thread.handle.join().map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + "Failed to finalize serialization preparation", + ) + })?; + } else { + // If we don’t have a premigration thread, that either means migration was + // cancelled at some point (i.e. F_LOG_ALL cleared; very unlikely and we + // consider sending SET_DEVICE_STATE_FD afterwards a protocol violation), + // or that there simply was no F_LOG_ALL at all. QEMU doesn’t necessarily + // do memory logging when snapshotting, and in such cases we have no choice + // but to just run preserialization now. + warn!( + "Front-end did not announce migration to begin, so we failed to \ + prepare for it; collecting data now. If you are doing a snapshot, \ + that is OK; otherwise, migration downtime may be prolonged." + ); + let _ = server.prepare_serialization(Arc::new(AtomicBool::new(false))); + } + + server.serialize(file).map_err(|e| { + io::Error::new(e.kind(), format!("Failed to save state: {}", e)) + }) + }) + } + + VhostTransferStateDirection::LOAD => { + if let Some(premigration_thread) = self.premigration_thread.lock().unwrap().take() { + // Strange, but OK + premigration_thread.cancel.store(true, Ordering::Relaxed); + warn!("cancelling serialization preparation because of incoming migration"); + let _ = premigration_thread.handle.join(); + } + + thread::spawn(move || { + server.deserialize_and_apply(file).map_err(|e| { + io::Error::new(e.kind(), format!("Failed to load state: {}", e)) + }) + }) + } + }; + + *self.migration_thread.lock().unwrap() = Some(join_handle); + + Ok(()) + } + + fn do_check_device_state(&self) -> io::Result<()> { + let result = if let Some(migration_thread) = self.migration_thread.lock().unwrap().take() { + // `Result::flatten()` is not stable yet, so no `.join().map_err(...).flatten()` + match migration_thread.join() { + Ok(x) => x, + Err(_) => Err(io::Error::new( + io::ErrorKind::Other, + "Failed to join the migration thread", + )), + } + } else { + // `check_device_state()` must follow a successful `set_device_state_fd()`, so this is + // a protocol violation + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Front-end attempts to check migration state, but no migration has been done", + )) + }; + + // Note that just like any other vhost-user message implementation, the error object that + // we return is not forwarded to the front end (it only receives an error flag), so if we + // want users to see some diagnostics, we have to print them ourselves + if let Err(e) = &result { + error!("Migration failed: {e}"); + } + result + } +} + +enum Either { + A(A), + B(B), +} + +pub(crate) struct Virtiofsd { + fs_backend: Arc>, + state: Either< + Option>>>, + JoinHandle>, + >, + /// The path to the Unix domain socket used for communication. + socket_path: PathBuf, +} + +impl Virtiofsd { + /// Create a `Virtiofsd` instance for sharing the given directory. + pub fn new(shared_dir: &Path) -> Result { + let socket = gen_sock("virtiofsd"); + let cache_policy = CachePolicy::Always; + let timeout = match cache_policy { + CachePolicy::Never => Duration::from_secs(0), + CachePolicy::Metadata => Duration::from_secs(86400), + CachePolicy::Auto => Duration::from_secs(1), + CachePolicy::Always => Duration::from_secs(86400), + }; + + let fs_cfg = passthrough::Config { + entry_timeout: timeout, + attr_timeout: timeout, + cache_policy, + root_dir: shared_dir + .to_str() + .context("shared directory is not a valid UTF-8 string")? + .to_string(), + announce_submounts: true, + ..Default::default() + }; + + let fs = PassthroughFs::new(fs_cfg) + .context("failed to create internal filesystem representation")?; + let fs_backend = + Arc::new(VhostUserFsBackend::new(fs).context("error creating vhost-user backend")?); + + let daemon = VhostUserDaemon::new( + String::from("virtiofsd-backend"), + fs_backend.clone(), + GuestMemoryAtomic::new(GuestMemoryMmap::new()), + ) + .map_err(|err| Error::msg(err.to_string())) + .context("failed to instantiate vhost user daemon")?; + + let slf = Self { + fs_backend, + state: Either::A(Some(daemon)), + socket_path: socket, + }; + Ok(slf) + } + + pub fn launch(&mut self) -> Result<()> { + if let Either::A(ref mut daemon) = &mut self.state { + let mut daemon = daemon.take().unwrap(); + let socket = self.socket_path.clone(); + self.state = Either::B(thread::spawn(move || daemon.serve(socket))); + } + Ok(()) + } + + #[inline] + pub fn socket_path(&self) -> &Path { + &self.socket_path + } +} + +impl Drop for Virtiofsd { + fn drop(&mut self) { + // Ideally we'd await the server thread, but that can + // conceptually block for a long time and shouldn't be done + // inside a constructor. + + let kill_evt = self + .fs_backend + .thread + .read() + .unwrap() + .kill_evt + .try_clone() + .unwrap(); + if let Err(err) = kill_evt.write(1) { + error!("failed to shut down worker thread: {err:#}"); + } + } +} diff --git a/tests/kernels/archlinux b/tests/kernels/archlinux index 1b51a39..cbadc22 100644 --- a/tests/kernels/archlinux +++ b/tests/kernels/archlinux @@ -10078,9 +10078,9 @@ CONFIG_QFMT_V2=m CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m -CONFIG_VIRTIO_FS=m +CONFIG_VIRTIO_FS=y CONFIG_FUSE_DAX=y CONFIG_OVERLAY_FS=m CONFIG_OVERLAY_FS_REDIRECT_DIR=y diff --git a/tests/kernels/fedora38 b/tests/kernels/fedora38 index e247a50..603b510 100644 --- a/tests/kernels/fedora38 +++ b/tests/kernels/fedora38 @@ -9359,9 +9359,9 @@ CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m -CONFIG_VIRTIO_FS=m +CONFIG_VIRTIO_FS=y CONFIG_FUSE_DAX=y CONFIG_OVERLAY_FS=m # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set diff --git a/tests/test.rs b/tests/test.rs index f5140c2..86741b4 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -113,7 +113,7 @@ fn test_vmtest_infra_error() { assert_eq!(failed, 69); } -// Expect we can run each target one by one, sucessfully +// Expect we can run each target one by one, successfully #[test] fn test_run_one() { let uefi_image = create_new_image(asset("image-uefi.raw-efi"));