diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index eb02c32d..cdc8513c 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -1229,7 +1229,7 @@ fuse-pipe/benches/ - Each VM gets a unique IPv6 derived from host's /64 subnet via hash of vm_id - Network namespace with bridge (br0) connecting TAP and veth for L2 forwarding - Proxy NDP on default interface makes VM IPv6 routable from network fabric -- ip6tables MASQUERADE for AWS VPC source/dest checks +- ip6tables MASQUERADE for AWS VPC source/dest checks (skipped when `--ipv6-prefix` is set) - Port forwarding via built-in TCP proxy (setns + tokio relay) on unique loopback IP (same allocation as rootless) - IPv4 stays internal to namespace (health checks only); all external traffic uses IPv6 - Egress proxy is NOT used — IPv6 goes natively through the kernel stack diff --git a/DESIGN.md b/DESIGN.md index e4b2815d..0f7cd7de 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -290,7 +290,7 @@ iptables -t nat -A PREROUTING -d 172.30.x.1 -p tcp --dport 8080 -j DNAT --to-des Uses veth pairs + IPv6 routing for kernel line-rate networking without userspace proxies. **Features**: -- Requires root and a host with a global IPv6 /64 subnet +- Requires root and a host with a global IPv6 /64 subnet (or `--ipv6-prefix` to specify one explicitly) - Native IPv6 routing through the kernel stack (no userspace L4 translation) - Each VM gets a unique IPv6 derived from the host's /64 prefix - Port forwarding via built-in TCP proxy (`setns` + tokio relay) on loopback IP (same as rootless) @@ -308,11 +308,12 @@ struct RoutedNetwork { vm_ipv6: Option, default_iface: Option, proxy_handles: Vec>, + ipv6_prefix: Option, // explicit /64 prefix (skips auto-detect + MASQUERADE) } async fn setup() -> Result { - preflight_check() // root, IPv6, ip6tables - detect_host_ipv6() // find /64 subnet (or /128 with on-link /64) + self.preflight_check() // root, IPv6, ip6tables (ip6tables skipped if --ipv6-prefix) + detect_host_ipv6() // find /64 subnet (or /128 with on-link /64); skipped if --ipv6-prefix generate_vm_ipv6(prefix, vm_id) // deterministic IPv6 from hash create_namespace(ns_name) create_veth_pair(host_veth, guest_veth) @@ -323,7 +324,7 @@ async fn setup() -> Result { // Namespace: default IPv6 route via host veth link-local // Host: /128 route to VM IPv6 via host veth // Proxy NDP on default interface - // ip6tables MASQUERADE for outbound + // ip6tables MASQUERADE for outbound (skipped if --ipv6-prefix is set) // TCP proxy port forwarding on loopback IP (setns + tokio relay) } ``` @@ -1366,7 +1367,7 @@ fcvm snapshot run --pid [OPTIONS] --exec Execute command in container after clone is healthy ``` -Network mode, port mappings, TTY, and interactive flags are inherited from the snapshot +Network mode, port mappings, TTY, interactive flags, and `--ipv6-prefix` are inherited from the snapshot metadata automatically — no need to re-specify them on clone. **Examples**: diff --git a/README.md b/README.md index 70788475..81156998 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,7 @@ fcvm auto-forwards `http_proxy`/`https_proxy` from host to VM via MMDS. - Firecracker binary in PATH - For rootless: `passt` package (provides `pasta`) - For bridged: sudo, iptables, iproute2 -- For routed: sudo, ip6tables, iproute2, host with global IPv6 /64 +- For routed: sudo, iproute2, host with global IPv6 /64 (ip6tables also needed unless `--ipv6-prefix` is set) - For rootfs build: qemu-utils, e2fsprogs **Storage:** btrfs at `/mnt/fcvm-btrfs` (auto-created as loopback on non-btrfs hosts) @@ -336,6 +336,7 @@ See [`Containerfile`](Containerfile) for the complete dependency list used in CI --portable-volumes Path-hash inodes for cross-machine snapshot/restore --rootfs-size Minimum free space on rootfs (default: 10G) --no-snapshot Disable automatic snapshot creation +--ipv6-prefix Use explicit /64 prefix for routed mode (skips auto-detect and MASQUERADE) ``` Run `fcvm --help` or `fcvm --help` for full options. diff --git a/src/cli/args.rs b/src/cli/args.rs index d436eb42..2496d2de 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -198,6 +198,14 @@ pub struct RunArgs { #[arg(long, value_enum, default_value_t = NetworkMode::Rootless)] pub network: NetworkMode, + /// Routable IPv6 /64 prefix for routed mode VM addressing. + /// Each VM gets a unique address in this prefix via NDP proxy. + /// When set, MASQUERADE is skipped (the prefix is directly routable). + /// When not set, auto-detected from host interfaces. + /// Example: --ipv6-prefix 2803:6084:7058:46f6 + #[arg(long)] + pub ipv6_prefix: Option, + /// HTTP health check URL. If not specified, health is based on container running status. /// The URL hostname is sent as the Host header; the connection goes to the guest IP. /// Example: --health-check http://myapp.example.com/status diff --git a/src/commands/common.rs b/src/commands/common.rs index a89fb7dc..ea205128 100644 --- a/src/commands/common.rs +++ b/src/commands/common.rs @@ -1282,6 +1282,7 @@ pub fn build_snapshot_config( user: vm_state.config.user.clone(), port_mappings: vm_state.config.port_mappings.clone(), network_mode: vm_state.config.network_mode, + ipv6_prefix: vm_state.config.ipv6_prefix.clone(), tty: vm_state.config.tty, interactive: vm_state.config.interactive, }, diff --git a/src/commands/podman/mod.rs b/src/commands/podman/mod.rs index 7ffa1a78..0d06fcee 100644 --- a/src/commands/podman/mod.rs +++ b/src/commands/podman/mod.rs @@ -600,6 +600,7 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { vm_state.config.portable_volumes = args.portable_volumes; vm_state.config.port_mappings = port_mappings.clone(); vm_state.config.network_mode = args.network.into(); + vm_state.config.ipv6_prefix = args.ipv6_prefix.clone(); vm_state.config.tty = args.tty; vm_state.config.interactive = args.interactive; vm_state.config.user = args.user.clone(); @@ -650,9 +651,13 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { port_mappings.clone(), )), NetworkMode::Routed => { - RoutedNetwork::preflight_check().context("routed mode preflight check failed")?; let mut net = RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()); + if let Some(ref prefix) = args.ipv6_prefix { + net = net.with_ipv6_prefix(prefix.clone()); + } + net.preflight_check() + .context("routed mode preflight check failed")?; if !port_mappings.is_empty() { let loopback_ip = state_manager .allocate_loopback_ip(&mut vm_state) @@ -813,7 +818,9 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result> { None }; - // Start egress proxy for rootless mode (bypasses TAP/bridge for outbound TCP) + // Start egress proxy for rootless mode only. + // Routed mode uses native IPv6 kernel routing — no proxy needed. + // Services use mutual TLS with client certs, not source IP matching. let egress_proxy_handle = if matches!(args.network, NetworkMode::Rootless) { let socket_path = vsock_socket_path.clone(); Some(tokio::spawn(async move { @@ -1161,6 +1168,7 @@ mod tests { rootfs_type: None, non_blocking_output: false, label: vec![], + ipv6_prefix: None, image: "alpine:latest".to_string(), command_args: vec![], } diff --git a/src/commands/serve.rs b/src/commands/serve.rs index 6aca1dea..6e54c02e 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -309,6 +309,7 @@ async fn create_sandbox( publish: vec![], balloon: None, network: crate::cli::NetworkMode::Rootless, + ipv6_prefix: None, health_check: None, health_check_timeout: 5, privileged: false, diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index 9e02b8e8..d7ec0006 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -643,7 +643,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // stays stuck reading from the old (dead) connection after VM resume resets vsock. let output_reconnect = Arc::new(tokio::sync::Notify::new()); // Channel to know when fc-agent's output connection arrives (gates health monitor) - let (output_connected_tx, output_connected_rx) = tokio::sync::oneshot::channel(); + let (output_connected_tx, mut output_connected_rx) = tokio::sync::oneshot::channel(); let output_handle = if !tty_mode { let socket_path = output_socket_path.clone(); let vm_id_clone = vm_id.clone(); @@ -728,7 +728,11 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { FcNetworkMode::Routed => { let mut net = RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone()); - net.preflight_check().context("routed mode preflight check failed")?; + if let Some(ref prefix) = snapshot_config.metadata.ipv6_prefix { + net = net.with_ipv6_prefix(prefix.clone()); + } + net.preflight_check() + .context("routed mode preflight check failed")?; if !port_mappings.is_empty() { let loopback_ip = state_manager .allocate_loopback_ip(&mut vm_state) @@ -790,6 +794,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { vm_state.config.user = snapshot_config.metadata.user.clone(); vm_state.config.port_mappings = port_mappings; vm_state.config.network_mode = network_mode; + vm_state.config.ipv6_prefix = snapshot_config.metadata.ipv6_prefix.clone(); vm_state.config.tty = tty_mode; vm_state.config.interactive = interactive; @@ -1101,10 +1106,33 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { // No timeout — after snapshot restore, the VM may be CPU-starved (HHVM, EdenFS, // falcon all resume simultaneously) and fc-agent's MMDS poll + restore handler // can take minutes. Proceeding early causes exec failures; waiting is correct. + // But poll VM liveness to avoid hanging forever if Firecracker crashes. if !tty_mode { - match output_connected_rx.await { - Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"), - Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"), + let mut liveness_interval = tokio::time::interval(std::time::Duration::from_secs(5)); + liveness_interval.tick().await; // consume immediate first tick + loop { + tokio::select! { + result = &mut output_connected_rx => { + match result { + Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"), + Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"), + } + break; + } + _ = liveness_interval.tick() => { + match vm_manager.try_wait() { + Ok(Some(status)) => { + warn!(vm_id = %vm_id, ?status, "VM exited before fc-agent connected"); + break; + } + Ok(None) => {} // still running + Err(e) => { + warn!(vm_id = %vm_id, error = %e, "VM liveness check failed"); + break; + } + } + } + } } } diff --git a/src/network/routed.rs b/src/network/routed.rs index 6b5d0033..52b3eea8 100644 --- a/src/network/routed.rs +++ b/src/network/routed.rs @@ -40,6 +40,8 @@ pub struct RoutedNetwork { tap_device: String, port_mappings: Vec, loopback_ip: Option, + /// Explicit routable /64 prefix. Skips auto-detect and MASQUERADE. + ipv6_prefix: Option, // Network state (populated during setup) namespace_id: Option, @@ -56,6 +58,7 @@ impl RoutedNetwork { tap_device, port_mappings, loopback_ip: None, + ipv6_prefix: None, namespace_id: None, host_veth: None, vm_ipv6: None, @@ -64,6 +67,40 @@ impl RoutedNetwork { } } + pub fn with_ipv6_prefix(mut self, prefix: String) -> Self { + self.ipv6_prefix = Some(prefix); + self + } + + /// Validate that a prefix string looks like a valid IPv6 /64 prefix + /// (4 colon-separated groups of 1-4 hex digits, e.g. "2600:1f1c:494:201"). + fn validate_ipv6_prefix(prefix: &str) -> Result<()> { + let groups: Vec<&str> = prefix.split(':').collect(); + if groups.len() != 4 { + anyhow::bail!( + "invalid --ipv6-prefix '{}': expected 4 colon-separated hex groups \ + (e.g. 2600:1f1c:494:201)", + prefix + ); + } + for group in &groups { + if group.is_empty() || group.len() > 4 { + anyhow::bail!( + "invalid --ipv6-prefix '{}': each group must be 1-4 hex digits", + prefix + ); + } + if u16::from_str_radix(group, 16).is_err() { + anyhow::bail!( + "invalid --ipv6-prefix '{}': '{}' is not valid hex", + prefix, + group + ); + } + } + Ok(()) + } + /// Get the network namespace ID (for setting Firecracker's namespace). pub fn namespace_id(&self) -> Option<&str> { self.namespace_id.as_deref() @@ -80,13 +117,10 @@ impl RoutedNetwork { /// Validate that the host meets requirements for routed networking. /// - /// Checks: - /// - Running as root (required for network namespaces and veth pairs) - /// - Host has a global IPv6 address with a /64 subnet - /// - ip6tables is available (for MASQUERADE) - /// /// Call this early (before VM setup) to give clear error messages. - pub fn preflight_check() -> Result<()> { + /// When `--ipv6-prefix` was set (via `with_ipv6_prefix`), auto-detect and + /// ip6tables checks are skipped. + pub fn preflight_check(&self) -> Result<()> { // Must be root if !nix::unistd::getuid().is_root() { anyhow::bail!( @@ -95,23 +129,29 @@ impl RoutedNetwork { ); } + if let Some(ref prefix) = self.ipv6_prefix { + Self::validate_ipv6_prefix(prefix)?; + return Ok(()); // Explicit prefix — no auto-detect or ip6tables needed + } + // Must have global IPv6 if Self::detect_host_ipv6().is_none() { anyhow::bail!( "routed networking requires a host with a global IPv6 address.\n\ - The host needs a /64 subnet (or a /128 with a /64 on-link route, e.g. AWS VPC).\n\ - Check with: ip -6 addr show scope global\n\ - If using AWS, ensure the instance has an IPv6 address assigned." + The host needs a non-deprecated /64 (or a /128 with a /64 on-link route).\n\ + Use --ipv6-prefix to specify a routable /64 prefix explicitly.\n\ + Check with: ip -6 addr show scope global" ); } - // ip6tables must be available + // ip6tables must be available (for MASQUERADE) let ip6tables = std::process::Command::new("ip6tables") .args(["--version"]) .output(); if ip6tables.is_err() || !ip6tables.unwrap().status.success() { anyhow::bail!( "routed networking requires ip6tables for IPv6 MASQUERADE.\n\ + Use --ipv6-prefix to specify a routable prefix (skips MASQUERADE).\n\ Install with: apt-get install iptables" ); } @@ -124,13 +164,15 @@ impl RoutedNetwork { self.vm_ipv6.as_deref() } - /// Detect host's global IPv6 address and /64 subnet. + /// Detect host's global IPv6 address and /64 subnet for VM addressing. /// Returns (host_ip, subnet_prefix) e.g. ("2600:1f1c:494:201::1", "2600:1f1c:494:201") /// - /// Supports two common configurations: - /// - Direct /64: host has an address with /64 prefix length (e.g. home/colo servers) - /// - AWS-style /128: host has a /128 address but the kernel has a /64 on-link route - /// from Router Advertisements (standard AWS VPC behavior) + /// Skips deprecated addresses (preferred_lft 0). Supports: + /// - Direct /64: host has an active address with /64 prefix length + /// - /128 with on-link /64 route: AWS VPC, service networks + /// + /// For hosts where auto-detect fails (e.g. only deprecated /64s), use + /// --ipv6-prefix to specify the routable prefix explicitly. fn detect_host_ipv6() -> Option<(String, String)> { let output = std::process::Command::new("ip") .args(["-6", "addr", "show", "scope", "global"]) @@ -138,9 +180,25 @@ impl RoutedNetwork { .ok()?; let stdout = String::from_utf8_lossy(&output.stdout); - for line in stdout.lines() { + // First pass: look for /64 addresses (preferred over /128) + if let Some(result) = Self::parse_host_ipv6(&stdout, false) { + return Some(result); + } + // Second pass: check /128 addresses with on-link /64 routes + Self::parse_host_ipv6(&stdout, true) + } + + /// Parse `ip -6 addr show` output to find a usable global IPv6 address. + /// When `check_onlink` is false, only returns /64 addresses. + /// When `check_onlink` is true, returns /128 addresses that have on-link /64 routes. + /// Skips deprecated, link-local, and ULA addresses. + fn parse_host_ipv6(output: &str, check_onlink: bool) -> Option<(String, String)> { + for line in output.lines() { let line = line.trim(); if let Some(addr) = line.strip_prefix("inet6 ") { + if line.contains("deprecated") { + continue; + } if let Some(addr_cidr) = addr.split_whitespace().next() { if let Some((addr, prefix_len)) = addr_cidr.split_once('/') { if addr.starts_with("fe80") || addr.starts_with("fd") { @@ -153,20 +211,19 @@ impl RoutedNetwork { segments[0], segments[1], segments[2], segments[3] ); - if prefix_len == "64" { - // Direct /64 — use as-is + if !check_onlink && prefix_len == "64" { return Some((addr.to_string(), prefix)); } - if prefix_len == "128" { - // AWS-style: /128 address, check for /64 on-link route - if Self::has_onlink_64_route(&prefix) { - info!( - addr = %addr, - prefix = %prefix, - "using /128 address with /64 on-link route" - ); - return Some((addr.to_string(), prefix)); - } + if check_onlink + && prefix_len == "128" + && Self::has_onlink_64_route(&prefix) + { + info!( + addr = %addr, + prefix = %prefix, + "using /128 address with /64 on-link route" + ); + return Some((addr.to_string(), prefix)); } } } @@ -258,9 +315,17 @@ impl NetworkManager for RoutedNetwork { "setting up routed networking" ); - // Detect host IPv6 subnet - let (host_ipv6, ipv6_prefix) = Self::detect_host_ipv6() - .context("routed mode requires a host with a global IPv6 /64 subnet")?; + // Resolve IPv6 /64 prefix: explicit --ipv6-prefix or auto-detect from interfaces + let (host_ipv6, ipv6_prefix) = if let Some(ref prefix) = self.ipv6_prefix { + let host_addr = format!("{}::1", prefix); + info!(prefix = %prefix, "using explicit --ipv6-prefix (routable, no MASQUERADE)"); + (host_addr, prefix.clone()) + } else { + Self::detect_host_ipv6().context( + "routed mode requires a global IPv6 /64 subnet. \ + Use --ipv6-prefix to specify one explicitly.", + )? + }; // Generate a unique IPv6 for this VM. Check for route collisions // (astronomically unlikely with 64-bit hash, but defend against it). @@ -356,6 +421,50 @@ impl NetworkManager for RoutedNetwork { .output() .await; + // Detect default interface early — used for sysctl checks AND proxy NDP below. + let default_iface = detect_default_ipv6_interface() + .await + .unwrap_or_else(|| "eth0".to_string()); + + // Verify host routing is set up correctly. These sysctls are the user's + // responsibility (host sysctl configuration), not fcvm's — but warn + // loudly if they're wrong because IPv6 egress silently fails without them. + if self.ipv6_prefix.is_none() { + if let Ok(val) = + tokio::fs::read_to_string("/proc/sys/net/ipv6/conf/all/forwarding").await + { + if val.trim() != "1" { + warn!( + "net.ipv6.conf.all.forwarding={} (need 1) — fix host sysctls", + val.trim() + ); + } + } + if let Ok(val) = tokio::fs::read_to_string(format!( + "/proc/sys/net/ipv6/conf/{}/accept_ra", + default_iface + )) + .await + { + if val.trim() != "2" { + warn!( + "net.ipv6.conf.{}.accept_ra={} (need 2) — IPv6 routing may fail after reboot", + default_iface, + val.trim() + ); + } + } + let route_check = tokio::process::Command::new("ip") + .args(["-6", "route", "show", "default"]) + .output() + .await; + if let Ok(out) = route_check { + if !String::from_utf8_lossy(&out.stdout).contains("default via") { + warn!("no default IPv6 route — fix host sysctls to fix accept_ra"); + } + } + } + // 8. Assign link-local to host veth manually (auto-assignment fails when // all.forwarding=1 from a previous run). Use EUI-64 from MAC + nodad. let host_ll = generate_link_local_from_mac(&host_veth) @@ -417,9 +526,7 @@ impl NetworkManager for RoutedNetwork { .await; // 12. Add proxy NDP so the network fabric routes VM's IPv6 to this host - let default_iface = detect_default_ipv6_interface() - .await - .unwrap_or_else(|| "eth0".to_string()); + // (default_iface already detected above) // Enable proxy NDP on the interface so the kernel actually responds // to neighbor solicitations for our proxy entries. let _ = tokio::process::Command::new("sysctl") @@ -447,22 +554,28 @@ impl NetworkManager for RoutedNetwork { // On AWS, source/dest check drops packets with unassigned source IPs. // MASQUERADE rewrites the source to the host's IP so the VPC fabric // accepts the traffic. IPv4 is not routed externally — only IPv6. - let _ = tokio::process::Command::new("ip6tables") - .args([ - "-t", - "nat", - "-A", - "POSTROUTING", - "-o", - &default_iface, - "-s", - &format!("{}/128", vm_ipv6), - "-j", - "MASQUERADE", - ]) - .output() - .await; - info!(iface = %default_iface, "added IPv6 MASQUERADE for outbound traffic"); + // Skipped when --ipv6-prefix is set: the prefix is directly routable + // and the VM's source IP matches the cert's IP SANs. + if self.ipv6_prefix.is_some() { + info!(iface = %default_iface, "skipping MASQUERADE (--ipv6-prefix is routable)"); + } else { + let _ = tokio::process::Command::new("ip6tables") + .args([ + "-t", + "nat", + "-A", + "POSTROUTING", + "-o", + &default_iface, + "-s", + &format!("{}/128", vm_ipv6), + "-j", + "MASQUERADE", + ]) + .output() + .await; + info!(iface = %default_iface, "added IPv6 MASQUERADE for outbound traffic"); + } // 14. Port forwarding: TCP proxy listens on host loopback, connects to VM // inside the namespace via setns(2). The veth is a bridge member so @@ -568,28 +681,30 @@ impl NetworkManager for RoutedNetwork { if let Some(ref vm_ipv6) = self.vm_ipv6 { let default_iface = self.default_iface.as_deref().unwrap_or("eth0"); - // Remove IPv6 MASQUERADE rule - match tokio::process::Command::new("ip6tables") - .args([ - "-t", - "nat", - "-D", - "POSTROUTING", - "-o", - default_iface, - "-s", - &format!("{}/128", vm_ipv6), - "-j", - "MASQUERADE", - ]) - .output() - .await - { - Ok(o) if !o.status.success() => { - warn!(stderr = %String::from_utf8_lossy(&o.stderr).trim(), "ip6tables MASQUERADE cleanup failed"); + // Remove IPv6 MASQUERADE rule (only if we set one — skipped with --ipv6-prefix) + if self.ipv6_prefix.is_none() { + match tokio::process::Command::new("ip6tables") + .args([ + "-t", + "nat", + "-D", + "POSTROUTING", + "-o", + default_iface, + "-s", + &format!("{}/128", vm_ipv6), + "-j", + "MASQUERADE", + ]) + .output() + .await + { + Ok(o) if !o.status.success() => { + warn!(stderr = %String::from_utf8_lossy(&o.stderr).trim(), "ip6tables MASQUERADE cleanup failed"); + } + Err(e) => warn!(error = %e, "ip6tables command failed"), + _ => {} } - Err(e) => warn!(error = %e, "ip6tables command failed"), - _ => {} } // Remove proxy NDP @@ -746,3 +861,166 @@ async fn detect_default_ipv6_interface() -> Option { } None } + +#[cfg(test)] +mod tests { + use super::*; + + // --- validate_ipv6_prefix tests --- + + #[test] + fn test_validate_ipv6_prefix_valid() { + assert!(RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("2803:6084:7058:46f6").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("0:0:0:0").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("ffff:ffff:ffff:ffff").is_ok()); + assert!(RoutedNetwork::validate_ipv6_prefix("a:b:c:d").is_ok()); + } + + #[test] + fn test_validate_ipv6_prefix_wrong_group_count() { + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201:abcd").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + let err = RoutedNetwork::validate_ipv6_prefix("2600").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + let err = RoutedNetwork::validate_ipv6_prefix("").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + } + + #[test] + fn test_validate_ipv6_prefix_invalid_hex() { + // Non-hex characters + let err = RoutedNetwork::validate_ipv6_prefix("zzzz:1f1c:494:201").unwrap_err(); + assert!(err.to_string().contains("not valid hex")); + + // Empty group (consecutive colons) — splits to 4 groups but one is empty + let err = RoutedNetwork::validate_ipv6_prefix("2600::494:201").unwrap_err(); + assert!(err + .to_string() + .contains("each group must be 1-4 hex digits")); + + // Group too long (5 digits) + let err = RoutedNetwork::validate_ipv6_prefix("26000:1f1c:494:201").unwrap_err(); + assert!(err + .to_string() + .contains("each group must be 1-4 hex digits")); + } + + #[test] + fn test_validate_ipv6_prefix_full_address_rejected() { + // Full IPv6 address (8 groups) should be rejected + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201:1:2:3:4").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + + // Compressed full address + let err = RoutedNetwork::validate_ipv6_prefix("2600:1f1c:494:201::1").unwrap_err(); + assert!(err + .to_string() + .contains("expected 4 colon-separated hex groups")); + } + + // --- generate_vm_ipv6 tests --- + + #[test] + fn test_generate_vm_ipv6_deterministic() { + let a1 = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-abc"); + let a2 = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-abc"); + assert_eq!(a1, a2, "same inputs must produce same output"); + + let b = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-xyz"); + assert_ne!(a1, b, "different vm_ids must produce different addresses"); + + let c = RoutedNetwork::generate_vm_ipv6("2803:6084:7058:46f6", "vm-abc"); + assert_ne!(a1, c, "different prefixes must produce different addresses"); + } + + #[test] + fn test_generate_vm_ipv6_format() { + let addr = RoutedNetwork::generate_vm_ipv6("2600:1f1c:494:201", "vm-test"); + assert!( + addr.starts_with("2600:1f1c:494:201:"), + "address must start with prefix: {}", + addr + ); + // Should have 8 colon-separated groups total (4 prefix + 4 interface ID) + let groups: Vec<&str> = addr.split(':').collect(); + assert_eq!(groups.len(), 8, "IPv6 must have 8 groups: {}", addr); + // Each interface ID group should be valid hex + for group in &groups[4..] { + assert!( + u16::from_str_radix(group, 16).is_ok(), + "group '{}' is not valid hex in: {}", + group, + addr + ); + } + } + + // --- parse_host_ipv6 tests (deprecated address filtering) --- + + #[test] + fn test_parse_host_ipv6_skips_deprecated() { + let output = "\ +2: eth0: mtu 9001 state UP + inet6 2600:1f1c:494:201::1/64 scope global deprecated dynamic noprefixroute + valid_lft 3552sec preferred_lft 0sec + inet6 2803:6084:7058:46f6::1/64 scope global dynamic noprefixroute + valid_lft 3552sec preferred_lft 3552sec"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_some(), "should find non-deprecated address"); + let (addr, prefix) = result.unwrap(); + assert_eq!(addr, "2803:6084:7058:46f6::1"); + assert_eq!(prefix, "2803:6084:7058:46f6"); + } + + #[test] + fn test_parse_host_ipv6_skips_link_local_and_ula() { + let output = "\ + inet6 fe80::1/64 scope global + inet6 fd00::1/64 scope global + inet6 2600:1f1c:494:201::5/64 scope global dynamic"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_some()); + let (addr, _) = result.unwrap(); + assert_eq!(addr, "2600:1f1c:494:201::5"); + } + + #[test] + fn test_parse_host_ipv6_all_deprecated_returns_none() { + let output = "\ + inet6 2600:1f1c:494:201::1/64 scope global deprecated dynamic + inet6 2803:6084:7058:46f6::1/64 scope global deprecated dynamic"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_none(), "all deprecated should return None"); + } + + #[test] + fn test_parse_host_ipv6_extracts_prefix() { + let output = " inet6 2600:1f1c:0494:0201::abcd/64 scope global dynamic"; + + let result = RoutedNetwork::parse_host_ipv6(output, false); + assert!(result.is_some()); + let (addr, prefix) = result.unwrap(); + assert_eq!(addr, "2600:1f1c:0494:0201::abcd"); + // Prefix is normalized through Ipv6Addr parsing (leading zeros stripped) + assert_eq!(prefix, "2600:1f1c:494:201"); + } +} diff --git a/src/state/types.rs b/src/state/types.rs index 9670e16f..01106b37 100644 --- a/src/state/types.rs +++ b/src/state/types.rs @@ -150,6 +150,10 @@ pub struct VmConfig { /// Stored so clones inherit the same networking mode from snapshots. #[serde(default)] pub network_mode: crate::firecracker::FcNetworkMode, + /// Explicit routable IPv6 /64 prefix for routed mode. + /// When set, MASQUERADE is skipped and auto-detect is bypassed. + #[serde(default)] + pub ipv6_prefix: Option, /// Whether a PTY is allocated for the container. #[serde(default)] pub tty: bool, @@ -199,6 +203,7 @@ impl VmState { portable_volumes: false, user: None, username: None, + ipv6_prefix: None, }, } } diff --git a/src/storage/snapshot.rs b/src/storage/snapshot.rs index 2f612432..d76d72bf 100644 --- a/src/storage/snapshot.rs +++ b/src/storage/snapshot.rs @@ -96,6 +96,9 @@ pub struct SnapshotMetadata { /// Network mode (bridged, rootless, routed) inherited by clones #[serde(default)] pub network_mode: crate::firecracker::FcNetworkMode, + /// Explicit routable IPv6 /64 prefix for routed mode (skips auto-detect and MASQUERADE) + #[serde(default)] + pub ipv6_prefix: Option, /// Whether PTY is allocated for the container #[serde(default)] pub tty: bool, @@ -283,6 +286,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -407,6 +411,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -475,6 +480,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -530,6 +536,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -636,6 +643,7 @@ mod tests { user: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, }, @@ -655,4 +663,58 @@ mod tests { let parsed: SnapshotConfig = serde_json::from_str(&json).unwrap(); assert_eq!(parsed.snapshot_type, SnapshotType::System); } + + #[test] + fn test_snapshot_metadata_ipv6_prefix_roundtrip() { + let metadata = SnapshotMetadata { + image: "nginx:alpine".to_string(), + vcpu: 2, + memory_mib: 512, + network_config: NetworkConfig::default(), + volumes: vec![], + health_check_url: None, + health_check_timeout: 5, + hugepages: false, + extra_disks: vec![], + username: None, + user: None, + port_mappings: vec![], + network_mode: Default::default(), + ipv6_prefix: Some("2600:1f1c:494:201".to_string()), + tty: false, + interactive: false, + }; + + let json = serde_json::to_string(&metadata).unwrap(); + assert!(json.contains("2600:1f1c:494:201")); + + let parsed: SnapshotMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!( + parsed.ipv6_prefix, + Some("2600:1f1c:494:201".to_string()), + "ipv6_prefix must survive serialization roundtrip" + ); + } + + #[test] + fn test_snapshot_metadata_ipv6_prefix_backward_compat() { + // Old snapshots won't have ipv6_prefix — must deserialize to None + let json = r#"{ + "image": "nginx:alpine", + "vcpu": 2, + "memory_mib": 512, + "network_config": { + "tap_device": "tap-old", + "guest_mac": "AA:BB:CC:DD:EE:FF" + } + }"#; + + let metadata: SnapshotMetadata = serde_json::from_str(json).unwrap(); + assert_eq!( + metadata.ipv6_prefix, None, + "missing ipv6_prefix must default to None for backward compat" + ); + assert_eq!(metadata.image, "nginx:alpine"); + assert_eq!(metadata.vcpu, 2); + } } diff --git a/tests/test_health_monitor.rs b/tests/test_health_monitor.rs index ed3ab0c2..cf857840 100644 --- a/tests/test_health_monitor.rs +++ b/tests/test_health_monitor.rs @@ -80,6 +80,7 @@ async fn test_health_monitor_behaviors() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), diff --git a/tests/test_library_api.rs b/tests/test_library_api.rs index e69e9b9a..286fc415 100644 --- a/tests/test_library_api.rs +++ b/tests/test_library_api.rs @@ -50,6 +50,7 @@ fn test_run_args(name: &str) -> RunArgs { label: vec![], non_blocking_output: false, health_check_timeout: 5, + ipv6_prefix: None, image: common::TEST_IMAGE.to_string(), command_args: vec![], } diff --git a/tests/test_state_manager.rs b/tests/test_state_manager.rs index 96675a4f..4fdac9cd 100644 --- a/tests/test_state_manager.rs +++ b/tests/test_state_manager.rs @@ -53,6 +53,7 @@ async fn test_state_persistence() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -129,6 +130,7 @@ async fn test_list_vms() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -189,6 +191,7 @@ async fn test_load_state_by_name_duplicate_detection() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -243,6 +246,7 @@ async fn test_load_state_by_name_duplicate_detection() { original_vsock_vm_id: None, port_mappings: vec![], network_mode: Default::default(), + ipv6_prefix: None, tty: false, interactive: false, labels: std::collections::HashMap::new(), @@ -257,3 +261,111 @@ async fn test_load_state_by_name_duplicate_detection() { let found = manager.load_state_by_name("unique-name").await.unwrap(); assert_eq!(found.pid, Some(6000)); } + +/// Helper to create a minimal VmState with given vm_id and pid +fn make_vm_state(vm_id: &str, name: &str, pid: u32) -> VmState { + VmState { + schema_version: 1, + vm_id: vm_id.to_string(), + name: Some(name.to_string()), + status: VmStatus::Running, + health_status: HealthStatus::Healthy, + exit_code: None, + pid: Some(pid), + holder_pid: None, + created_at: Utc::now(), + last_updated: Utc::now(), + config: VmConfig { + image: "test:latest".to_string(), + vcpu: 1, + memory_mib: 256, + network: NetworkConfig::default(), + volumes: vec![], + extra_disks: vec![], + nfs_shares: vec![], + health_check_url: None, + snapshot_name: None, + process_type: Some(ProcessType::Vm), + serve_pid: None, + original_vsock_vm_id: None, + port_mappings: vec![], + network_mode: Default::default(), + ipv6_prefix: None, + tty: false, + interactive: false, + labels: std::collections::HashMap::new(), + hugepages: false, + portable_volumes: false, + user: None, + username: None, + health_check_timeout: 5, + }, + } +} + +#[tokio::test] +async fn test_load_state_by_pid_found() { + let temp_dir = TempDir::new().unwrap(); + let manager = StateManager::new(temp_dir.path().to_path_buf()); + manager.init().await.unwrap(); + + // Use our own PID (guaranteed to exist in /proc) + let my_pid = std::process::id(); + let state = make_vm_state("vm-pid-test", "pid-test", my_pid); + manager.save_state(&state).await.unwrap(); + + let found = manager.load_state_by_pid(my_pid).await.unwrap(); + assert_eq!(found.vm_id, "vm-pid-test"); + assert_eq!(found.pid, Some(my_pid)); +} + +#[tokio::test] +async fn test_load_state_by_pid_not_found() { + let temp_dir = TempDir::new().unwrap(); + let manager = StateManager::new(temp_dir.path().to_path_buf()); + manager.init().await.unwrap(); + + let my_pid = std::process::id(); + let state = make_vm_state("vm-other", "other", my_pid); + manager.save_state(&state).await.unwrap(); + + // Search for a PID that no VM has + let err = manager + .load_state_by_pid(99999999) + .await + .expect_err("should fail for unknown PID"); + assert!( + err.to_string().contains("No VM found with PID"), + "error should mention PID: {}", + err + ); +} + +#[tokio::test] +async fn test_load_state_by_pid_cleans_stale_on_retry() { + let temp_dir = TempDir::new().unwrap(); + let manager = StateManager::new(temp_dir.path().to_path_buf()); + manager.init().await.unwrap(); + + // Create a stale state file with a PID that doesn't exist. + // Use a very high PID that's virtually guaranteed to not exist. + let stale_pid = 4_000_000_000u32; + let stale_state = make_vm_state("vm-stale", "stale", stale_pid); + manager.save_state(&stale_state).await.unwrap(); + + // Verify the stale file exists + let vms = manager.list_vms().await.unwrap(); + assert_eq!(vms.len(), 1, "stale VM state should exist before cleanup"); + + // load_state_by_pid for a non-existent PID triggers cleanup_stale_state + let _ = manager.load_state_by_pid(99999998).await; + + // After the failed lookup, the stale state should have been cleaned up + // (PID 4000000000 doesn't exist in /proc) + let vms_after = manager.list_vms().await.unwrap(); + assert_eq!( + vms_after.len(), + 0, + "stale state file should be removed after cleanup" + ); +}