From d0271562a7441eace2bb1452e6d742e3902b4d4b Mon Sep 17 00:00:00 2001
From: Jernej Kos <jernej@kos.mx>
Date: Thu, 16 Jan 2025 11:35:09 +0100
Subject: [PATCH] rofl-containers: Add support for persistent storage

---
 Cargo.lock                                    | 95 ++++++++++++++++++-
 rofl-appd/src/lib.rs                          | 25 ++---
 rofl-appd/src/services/kms.rs                 | 24 +++++
 rofl-containers/Cargo.toml                    |  7 +-
 rofl-containers/src/main.rs                   | 41 +++++++-
 rofl-containers/src/reaper.rs                 | 33 +++++++
 rofl-containers/src/storage.rs                | 92 ++++++++++++++++++
 runtime-sdk/src/modules/rofl/app/mod.rs       | 12 +++
 runtime-sdk/src/modules/rofl/app/processor.rs | 21 +++-
 9 files changed, 329 insertions(+), 21 deletions(-)
 create mode 100644 rofl-containers/src/reaper.rs
 create mode 100644 rofl-containers/src/storage.rs

diff --git a/Cargo.lock b/Cargo.lock
index f6f5d45a74..5d9bcd6d2a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -713,6 +713,32 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "cmd_lib"
+version = "1.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "371c15a3c178d0117091bd84414545309ca979555b1aad573ef591ad58818d41"
+dependencies = [
+ "cmd_lib_macros",
+ "env_logger",
+ "faccess",
+ "lazy_static",
+ "log",
+ "os_pipe",
+]
+
+[[package]]
+name = "cmd_lib_macros"
+version = "1.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb844bd05be34d91eb67101329aeba9d3337094c04fd8507d821db7ebb488eaf"
+dependencies = [
+ "proc-macro-error2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.90",
+]
+
 [[package]]
 name = "colored"
 version = "2.1.0"
@@ -1317,6 +1343,19 @@ dependencies = [
  "cfg-if 1.0.0",
 ]
 
+[[package]]
+name = "env_logger"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
+dependencies = [
+ "humantime",
+ "is-terminal",
+ "log",
+ "regex",
+ "termcolor",
+]
+
 [[package]]
 name = "environmental"
 version = "1.1.4"
@@ -1477,6 +1516,17 @@ dependencies = [
  "sha3",
 ]
 
+[[package]]
+name = "faccess"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ae66425802d6a903e268ae1a08b8c38ba143520f227a205edf4e9c7e3e26d5"
+dependencies = [
+ "bitflags 1.3.2",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "fallible-iterator"
 version = "0.2.0"
@@ -1984,6 +2034,12 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
 [[package]]
 name = "hyper"
 version = "1.5.2"
@@ -3150,6 +3206,16 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
 
+[[package]]
+name = "os_pipe"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ffd2b0a5634335b135d5728d84c5e0fd726954b87111f7506a61c502280d982"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "overload"
 version = "0.1.1"
@@ -3488,6 +3554,28 @@ dependencies = [
  "toml_edit 0.22.22",
 ]
 
+[[package]]
+name = "proc-macro-error-attr2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "proc-macro-error2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
+dependencies = [
+ "proc-macro-error-attr2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.90",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.92"
@@ -3885,11 +3973,16 @@ dependencies = [
 
 [[package]]
 name = "rofl-containers"
-version = "0.1.0"
+version = "0.2.0"
 dependencies = [
+ "anyhow",
  "base64",
+ "cmd_lib",
+ "hex",
+ "nix",
  "oasis-runtime-sdk",
  "rofl-appd",
+ "tokio",
 ]
 
 [[package]]
diff --git a/rofl-appd/src/lib.rs b/rofl-appd/src/lib.rs
index 9b5bc5037b..7faa846b94 100644
--- a/rofl-appd/src/lib.rs
+++ b/rofl-appd/src/lib.rs
@@ -1,7 +1,7 @@
 //! REST API daemon accessible by ROFL apps.
 
 mod routes;
-pub(crate) mod services;
+pub mod services;
 pub(crate) mod state;
 
 use std::sync::Arc;
@@ -10,26 +10,29 @@ use rocket::{figment::Figment, routes};
 
 use oasis_runtime_sdk::modules::rofl::app::{App, Environment};
 
+/// API server configuration.
+#[derive(Clone)]
+pub struct Config<'a> {
+    /// Address where the service should listen on.
+    pub address: &'a str,
+    /// Key management service to use.
+    pub kms: Arc<dyn services::kms::KmsService>,
+}
+
 /// Start the REST API server.
-pub async fn start<A>(address: &str, env: Environment<A>) -> Result<(), rocket::Error>
+pub async fn start<A>(cfg: Config<'_>, env: Environment<A>) -> Result<(), rocket::Error>
 where
     A: App,
 {
-    // KMS service.
-    let kms_service: Arc<dyn services::kms::KmsService> =
-        Arc::new(services::kms::OasisKmsService::new(env.clone()));
-    let kms_service_task = kms_service.clone();
-    tokio::spawn(async move { kms_service_task.start().await });
-
     // Oasis runtime environment.
     let env: Arc<dyn state::Env> = Arc::new(state::EnvImpl::new(env));
 
     // Server configuration.
-    let cfg = Figment::new().join(("address", address));
+    let rocket_cfg = Figment::new().join(("address", cfg.address));
 
-    rocket::custom(cfg)
+    rocket::custom(rocket_cfg)
         .manage(env)
-        .manage(kms_service)
+        .manage(cfg.kms)
         .mount("/rofl/v1/app", routes![routes::app::id,])
         .mount("/rofl/v1/keys", routes![routes::keys::generate,])
         .launch()
diff --git a/rofl-appd/src/services/kms.rs b/rofl-appd/src/services/kms.rs
index fccd9b3d82..ae4d3e851a 100644
--- a/rofl-appd/src/services/kms.rs
+++ b/rofl-appd/src/services/kms.rs
@@ -4,6 +4,7 @@ use std::sync::{
 };
 
 use sp800_185::KMac;
+use tokio::sync::Notify;
 
 use oasis_runtime_sdk::{
     core::common::logger::get_logger,
@@ -17,6 +18,9 @@ pub trait KmsService: Send + Sync {
     /// Start the KMS service.
     async fn start(&self) -> Result<(), Error>;
 
+    /// Waits for the service to become ready to accept requests.
+    async fn wait_ready(&self) -> Result<(), Error>;
+
     /// Generate a key based on the passed parameters.
     async fn generate(&self, request: &GenerateRequest<'_>) -> Result<GenerateResponse, Error>;
 }
@@ -94,6 +98,7 @@ pub struct OasisKmsService<A: App> {
     root_key: Arc<Mutex<Option<Vec<u8>>>>,
     env: Environment<A>,
     logger: slog::Logger,
+    ready_notify: Notify,
 }
 
 impl<A: App> OasisKmsService<A> {
@@ -103,6 +108,7 @@ impl<A: App> OasisKmsService<A> {
             root_key: Arc::new(Mutex::new(None)),
             env,
             logger: get_logger("appd/services/kms"),
+            ready_notify: Notify::new(),
         }
     }
 }
@@ -146,11 +152,25 @@ impl<A: App> KmsService for OasisKmsService<A> {
         // Store the key in memory.
         *self.root_key.lock().unwrap() = Some(root_key.key);
 
+        self.ready_notify.notify_waiters();
+
         slog::info!(self.logger, "KMS service initialized");
 
         Ok(())
     }
 
+    async fn wait_ready(&self) -> Result<(), Error> {
+        let handle = self.ready_notify.notified();
+
+        if self.root_key.lock().unwrap().is_some() {
+            return Ok(());
+        }
+
+        handle.await;
+
+        Ok(())
+    }
+
     async fn generate(&self, request: &GenerateRequest<'_>) -> Result<GenerateResponse, Error> {
         let root_key_guard = self.root_key.lock().unwrap();
         let root_key = root_key_guard.as_ref().ok_or(Error::NotInitialized)?;
@@ -173,6 +193,10 @@ impl KmsService for MockKmsService {
         Ok(())
     }
 
+    async fn wait_ready(&self) -> Result<(), Error> {
+        Ok(())
+    }
+
     async fn generate(&self, request: &GenerateRequest<'_>) -> Result<GenerateResponse, Error> {
         let key = Kdf::derive_key(
             INSECURE_MOCK_ROOT_KEY,
diff --git a/rofl-containers/Cargo.toml b/rofl-containers/Cargo.toml
index 51e2250ca3..ae6170e3b4 100644
--- a/rofl-containers/Cargo.toml
+++ b/rofl-containers/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rofl-containers"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 
 [dependencies]
@@ -9,4 +9,9 @@ oasis-runtime-sdk = { path = "../runtime-sdk", features = ["tdx"] }
 rofl-appd = { path = "../rofl-appd" }
 
 # Third party.
+anyhow = "1.0.86"
 base64 = "0.22.1"
+cmd_lib = "1.9.5"
+hex = "0.4.3"
+nix = { version = "0.29.0", features = ["signal"] }
+tokio = { version = "1.38", features = ["rt", "rt-multi-thread", "sync", "time", "macros"] }
diff --git a/rofl-containers/src/main.rs b/rofl-containers/src/main.rs
index 5f65213d4a..e3552ba022 100644
--- a/rofl-containers/src/main.rs
+++ b/rofl-containers/src/main.rs
@@ -11,7 +11,15 @@
 use std::env;
 
 use base64::prelude::*;
-use oasis_runtime_sdk::{cbor, modules::rofl::app::prelude::*};
+use oasis_runtime_sdk::{
+    cbor,
+    core::common::{logger::get_logger, process},
+    modules::rofl::app::prelude::*,
+};
+use rofl_appd::services;
+
+mod reaper;
+mod storage;
 
 /// UNIX socket address where the REST API server will listen on.
 const ROFL_APPD_ADDRESS: &str = "unix:/run/rofl-appd.sock";
@@ -42,12 +50,39 @@ impl App for ContainersApp {
         .expect("Corrupted ROFL_CONSENSUS_TRUST_ROOT (must be Base64-encoded CBOR).")
     }
 
-    async fn run(self: Arc<Self>, env: Environment<Self>) {
+    async fn post_registration_init(self: Arc<Self>, env: Environment<Self>) {
+        // Temporarily disable the default process reaper as it interferes with scripts.
+        let _guard = reaper::disable_default_reaper();
+        let logger = get_logger("post_registration_init");
+
+        // Start the key management service and wait for it to initialize.
+        let kms: Arc<dyn services::kms::KmsService> =
+            Arc::new(services::kms::OasisKmsService::new(env.clone()));
+        let kms_task = kms.clone();
+        tokio::spawn(async move { kms_task.start().await });
+        let _ = kms.wait_ready().await;
+
+        // Initialize storage when configured in the kernel cmdline.
+        if let Err(err) = storage::init(kms.clone()).await {
+            slog::error!(logger, "failed to initialize stage 2 storage"; "err" => ?err);
+            process::abort();
+        }
+
         // Start the REST API server.
-        let _ = rofl_appd::start(ROFL_APPD_ADDRESS, env).await;
+        let cfg = rofl_appd::Config {
+            address: ROFL_APPD_ADDRESS,
+            kms,
+        };
+        let _ = rofl_appd::start(cfg, env).await;
     }
 }
 
 fn main() {
+    // Configure the binary search path.
+    // SAFETY: This is safe as no other threads are running yet.
+    unsafe {
+        env::set_var("PATH", "/usr/sbin:/usr/bin:/sbin:/bin");
+    }
+
     ContainersApp.start();
 }
diff --git a/rofl-containers/src/reaper.rs b/rofl-containers/src/reaper.rs
new file mode 100644
index 0000000000..d5ed5dc370
--- /dev/null
+++ b/rofl-containers/src/reaper.rs
@@ -0,0 +1,33 @@
+use nix::sys::signal::{sigaction, SaFlags, SigAction, SigHandler, SigSet, Signal};
+
+/// Guard that re-enables the default process reaper when dropped.
+pub struct DisableReaperGuard {
+    _internal: (),
+}
+
+impl Drop for DisableReaperGuard {
+    fn drop(&mut self) {
+        // Re-enable default kernel process reaper.
+        unsafe {
+            let _ = sigaction(
+                Signal::SIGCHLD,
+                &SigAction::new(SigHandler::SigIgn, SaFlags::empty(), SigSet::empty()),
+            );
+        }
+    }
+}
+
+/// Temporarily disables the default process reaper. When the returned guard gets out of scope, the
+/// default reaper is re-enabled.
+///
+/// This assumes that the default reaper has been previously configured by core init.
+pub fn disable_default_reaper() -> DisableReaperGuard {
+    unsafe {
+        let _ = sigaction(
+            Signal::SIGCHLD,
+            &SigAction::new(SigHandler::SigDfl, SaFlags::empty(), SigSet::empty()),
+        );
+    }
+
+    DisableReaperGuard { _internal: () }
+}
diff --git a/rofl-containers/src/storage.rs b/rofl-containers/src/storage.rs
new file mode 100644
index 0000000000..e5df76fe86
--- /dev/null
+++ b/rofl-containers/src/storage.rs
@@ -0,0 +1,92 @@
+use std::{fs, sync::Arc};
+
+use anyhow::Result;
+use cmd_lib::run_cmd;
+
+use rofl_appd::services::{
+    self,
+    kms::{GenerateRequest, KeyKind},
+};
+
+/// Storage encryption key identifier.
+const STORAGE_ENCRYPTION_KEY_ID: &str =
+    "oasis-runtime-sdk/rofl-containers: storage encryption key v1";
+
+/// Initialize stage 2 storage based on configuration.
+pub async fn init(kms: Arc<dyn services::kms::KmsService>) -> Result<()> {
+    // Parse kernel command line to determine relevant features.
+    let cmdline = fs::read_to_string("/proc/cmdline")?;
+    let storage_mode = cmdline
+        .split(' ')
+        .filter_map(|s| {
+            if !s.is_empty() {
+                Some(s.split_once('=')?)
+            } else {
+                None
+            }
+        })
+        .filter(|(k, _)| *k == "oasis.stage2.storage_mode")
+        .map(|(_, v)| v)
+        .next();
+    if storage_mode != Some("custom") {
+        return Ok(()); // Ignore non-custom storage mode.
+    }
+
+    // Derive storage key.
+    let storage_key = kms
+        .generate(&GenerateRequest {
+            key_id: STORAGE_ENCRYPTION_KEY_ID,
+            kind: KeyKind::Raw384,
+        })
+        .await?;
+    let storage_key = hex::encode(&storage_key.key);
+
+    // Ensure all device mapper devices are present.
+    run_cmd!(dmsetup mknodes)?;
+
+    // Open or re-format storage.
+    let result = open_storage(&storage_key);
+    if result.is_err() {
+        format_storage(&storage_key)?;
+    }
+
+    // Mount filesystem as /storage.
+    run_cmd!(mount "/dev/mapper/storage" "/storage")?;
+
+    // Setup /run and /var.
+    run_cmd!(
+        mkdir "/storage/run";
+        mkdir -p "/storage/var/lib";
+        mkdir -p "/storage/var/cache";
+        mount --bind "/storage/run" "/run";
+        mount --bind "/storage/var" "/var";
+    )?;
+
+    Ok(())
+}
+
+/// Attempt to open the storage partition block device using the given storage key.
+fn open_storage(storage_key: &str) -> Result<()> {
+    run_cmd!(
+        echo -n ${storage_key} |
+            cryptsetup open --type luks2 --disable-locks "/dev/mapper/part-storage" storage
+    )?;
+
+    Ok(())
+}
+
+/// Format the storage partition block device using the given storage key.
+fn format_storage(storage_key: &str) -> Result<()> {
+    // Format block device.
+    run_cmd!(
+        echo -n ${storage_key} |
+            cryptsetup luksFormat --type luks2 --integrity hmac-sha256 --disable-locks "/dev/mapper/part-storage"
+    )?;
+
+    open_storage(storage_key)?;
+
+    // Format filesystem.
+    run_cmd!(mkfs.ext4 "/dev/mapper/storage")?;
+
+    Ok(())
+}
diff --git a/runtime-sdk/src/modules/rofl/app/mod.rs b/runtime-sdk/src/modules/rofl/app/mod.rs
index b0bf320681..b3f114a2c3 100644
--- a/runtime-sdk/src/modules/rofl/app/mod.rs
+++ b/runtime-sdk/src/modules/rofl/app/mod.rs
@@ -74,6 +74,18 @@ pub trait App: Send + Sync + 'static {
         tx
     }
 
+    /// Custom post-registration initialization. It runs before any image-specific scripts are
+    /// called by the runtime so it can be used to do things like set up custom storage after
+    /// successful registration.
+    ///
+    /// Until this function completes, no further initialization will happen.
+    async fn post_registration_init(self: Arc<Self>, env: Environment<Self>)
+    where
+        Self: Sized,
+    {
+        // Default implementation does nothing.
+    }
+
     /// Main application processing loop.
     async fn run(self: Arc<Self>, env: Environment<Self>)
     where
diff --git a/runtime-sdk/src/modules/rofl/app/processor.rs b/runtime-sdk/src/modules/rofl/app/processor.rs
index 455acf9df2..e4936cf6ab 100644
--- a/runtime-sdk/src/modules/rofl/app/processor.rs
+++ b/runtime-sdk/src/modules/rofl/app/processor.rs
@@ -181,11 +181,22 @@ where
         tokio::spawn(self.state.app.clone().run(self.env.clone()));
 
         // Perform post-registration initialization.
-        slog::info!(
-            self.logger,
-            "performing additional post-registration initialization"
-        );
-        init::post_registration_init();
+        let app = self.state.app.clone();
+        let env = self.env.clone();
+        let logger = self.logger.clone();
+        tokio::spawn(async move {
+            slog::info!(
+                logger,
+                "performing app-specific post-registration initialization"
+            );
+            app.post_registration_init(env).await;
+
+            slog::info!(
+                logger,
+                "performing additional post-registration initialization"
+            );
+            init::post_registration_init();
+        });
 
         // Notify notifier task.
         self.tasks