Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -1229,7 +1229,7 @@ fuse-pipe/benches/
- Each VM gets a unique IPv6 derived from host's /64 subnet via hash of vm_id
- Network namespace with bridge (br0) connecting TAP and veth for L2 forwarding
- Proxy NDP on default interface makes VM IPv6 routable from network fabric
- ip6tables MASQUERADE for AWS VPC source/dest checks
- ip6tables MASQUERADE for AWS VPC source/dest checks (skipped when `--ipv6-prefix` is set)
- Port forwarding via built-in TCP proxy (setns + tokio relay) on unique loopback IP (same allocation as rootless)
- IPv4 stays internal to namespace (health checks only); all external traffic uses IPv6
- Egress proxy is NOT used — IPv6 goes natively through the kernel stack
Expand Down
11 changes: 6 additions & 5 deletions DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ iptables -t nat -A PREROUTING -d 172.30.x.1 -p tcp --dport 8080 -j DNAT --to-des
Uses veth pairs + IPv6 routing for kernel line-rate networking without userspace proxies.

**Features**:
- Requires root and a host with a global IPv6 /64 subnet
- Requires root and a host with a global IPv6 /64 subnet (or `--ipv6-prefix` to specify one explicitly)
- Native IPv6 routing through the kernel stack (no userspace L4 translation)
- Each VM gets a unique IPv6 derived from the host's /64 prefix
- Port forwarding via built-in TCP proxy (`setns` + tokio relay) on loopback IP (same as rootless)
Expand All @@ -308,11 +308,12 @@ struct RoutedNetwork {
vm_ipv6: Option<String>,
default_iface: Option<String>,
proxy_handles: Vec<JoinHandle<()>>,
ipv6_prefix: Option<String>, // explicit /64 prefix (skips auto-detect + MASQUERADE)
}

async fn setup() -> Result<NetworkConfig> {
preflight_check() // root, IPv6, ip6tables
detect_host_ipv6() // find /64 subnet (or /128 with on-link /64)
self.preflight_check() // root, IPv6, ip6tables (ip6tables skipped if --ipv6-prefix)
detect_host_ipv6() // find /64 subnet (or /128 with on-link /64); skipped if --ipv6-prefix
generate_vm_ipv6(prefix, vm_id) // deterministic IPv6 from hash
create_namespace(ns_name)
create_veth_pair(host_veth, guest_veth)
Expand All @@ -323,7 +324,7 @@ async fn setup() -> Result<NetworkConfig> {
// Namespace: default IPv6 route via host veth link-local
// Host: /128 route to VM IPv6 via host veth
// Proxy NDP on default interface
// ip6tables MASQUERADE for outbound
// ip6tables MASQUERADE for outbound (skipped if --ipv6-prefix is set)
// TCP proxy port forwarding on loopback IP (setns + tokio relay)
}
```
Expand Down Expand Up @@ -1366,7 +1367,7 @@ fcvm snapshot run --pid <SERVE_PID> [OPTIONS]
--exec <CMD> Execute command in container after clone is healthy
```

Network mode, port mappings, TTY, and interactive flags are inherited from the snapshot
Network mode, port mappings, TTY, interactive flags, and `--ipv6-prefix` are inherited from the snapshot
metadata automatically — no need to re-specify them on clone.

**Examples**:
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ fcvm auto-forwards `http_proxy`/`https_proxy` from host to VM via MMDS.
- Firecracker binary in PATH
- For rootless: `passt` package (provides `pasta`)
- For bridged: sudo, iptables, iproute2
- For routed: sudo, ip6tables, iproute2, host with global IPv6 /64
- For routed: sudo, iproute2, host with global IPv6 /64 (ip6tables also needed unless `--ipv6-prefix` is set)
- For rootfs build: qemu-utils, e2fsprogs

**Storage:** btrfs at `/mnt/fcvm-btrfs` (auto-created as loopback on non-btrfs hosts)
Expand Down Expand Up @@ -336,6 +336,7 @@ See [`Containerfile`](Containerfile) for the complete dependency list used in CI
--portable-volumes Path-hash inodes for cross-machine snapshot/restore
--rootfs-size <SIZE> Minimum free space on rootfs (default: 10G)
--no-snapshot Disable automatic snapshot creation
--ipv6-prefix <PREFIX> Use explicit /64 prefix for routed mode (skips auto-detect and MASQUERADE)
```

Run `fcvm --help` or `fcvm <command> --help` for full options.
Expand Down
8 changes: 8 additions & 0 deletions src/cli/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,14 @@ pub struct RunArgs {
#[arg(long, value_enum, default_value_t = NetworkMode::Rootless)]
pub network: NetworkMode,

/// Routable IPv6 /64 prefix for routed mode VM addressing.
/// Each VM gets a unique address in this prefix via NDP proxy.
/// When set, MASQUERADE is skipped (the prefix is directly routable).
/// When not set, auto-detected from host interfaces.
/// Example: --ipv6-prefix 2803:6084:7058:46f6
#[arg(long)]
pub ipv6_prefix: Option<String>,

/// HTTP health check URL. If not specified, health is based on container running status.
/// The URL hostname is sent as the Host header; the connection goes to the guest IP.
/// Example: --health-check http://myapp.example.com/status
Expand Down
1 change: 1 addition & 0 deletions src/commands/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,7 @@ pub fn build_snapshot_config(
user: vm_state.config.user.clone(),
port_mappings: vm_state.config.port_mappings.clone(),
network_mode: vm_state.config.network_mode,
ipv6_prefix: vm_state.config.ipv6_prefix.clone(),
tty: vm_state.config.tty,
interactive: vm_state.config.interactive,
},
Expand Down
12 changes: 10 additions & 2 deletions src/commands/podman/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result<Option<VmContext>> {
vm_state.config.portable_volumes = args.portable_volumes;
vm_state.config.port_mappings = port_mappings.clone();
vm_state.config.network_mode = args.network.into();
vm_state.config.ipv6_prefix = args.ipv6_prefix.clone();
vm_state.config.tty = args.tty;
vm_state.config.interactive = args.interactive;
vm_state.config.user = args.user.clone();
Expand Down Expand Up @@ -650,9 +651,13 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result<Option<VmContext>> {
port_mappings.clone(),
)),
NetworkMode::Routed => {
RoutedNetwork::preflight_check().context("routed mode preflight check failed")?;
let mut net =
RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone());
if let Some(ref prefix) = args.ipv6_prefix {
net = net.with_ipv6_prefix(prefix.clone());
}
net.preflight_check()
.context("routed mode preflight check failed")?;
if !port_mappings.is_empty() {
let loopback_ip = state_manager
.allocate_loopback_ip(&mut vm_state)
Expand Down Expand Up @@ -813,7 +818,9 @@ pub async fn prepare_vm(mut args: RunArgs) -> Result<Option<VmContext>> {
None
};

// Start egress proxy for rootless mode (bypasses TAP/bridge for outbound TCP)
// Start egress proxy for rootless mode only.
// Routed mode uses native IPv6 kernel routing — no proxy needed.
// Services use mutual TLS with client certs, not source IP matching.
let egress_proxy_handle = if matches!(args.network, NetworkMode::Rootless) {
let socket_path = vsock_socket_path.clone();
Some(tokio::spawn(async move {
Expand Down Expand Up @@ -1161,6 +1168,7 @@ mod tests {
rootfs_type: None,
non_blocking_output: false,
label: vec![],
ipv6_prefix: None,
image: "alpine:latest".to_string(),
command_args: vec![],
}
Expand Down
1 change: 1 addition & 0 deletions src/commands/serve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ async fn create_sandbox(
publish: vec![],
balloon: None,
network: crate::cli::NetworkMode::Rootless,
ipv6_prefix: None,
health_check: None,
health_check_timeout: 5,
privileged: false,
Expand Down
38 changes: 33 additions & 5 deletions src/commands/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> {
// stays stuck reading from the old (dead) connection after VM resume resets vsock.
let output_reconnect = Arc::new(tokio::sync::Notify::new());
// Channel to know when fc-agent's output connection arrives (gates health monitor)
let (output_connected_tx, output_connected_rx) = tokio::sync::oneshot::channel();
let (output_connected_tx, mut output_connected_rx) = tokio::sync::oneshot::channel();
let output_handle = if !tty_mode {
let socket_path = output_socket_path.clone();
let vm_id_clone = vm_id.clone();
Expand Down Expand Up @@ -728,7 +728,11 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> {
FcNetworkMode::Routed => {
let mut net =
RoutedNetwork::new(vm_id.clone(), tap_device.clone(), port_mappings.clone());
net.preflight_check().context("routed mode preflight check failed")?;
if let Some(ref prefix) = snapshot_config.metadata.ipv6_prefix {
net = net.with_ipv6_prefix(prefix.clone());
}
net.preflight_check()
.context("routed mode preflight check failed")?;
if !port_mappings.is_empty() {
let loopback_ip = state_manager
.allocate_loopback_ip(&mut vm_state)
Expand Down Expand Up @@ -790,6 +794,7 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> {
vm_state.config.user = snapshot_config.metadata.user.clone();
vm_state.config.port_mappings = port_mappings;
vm_state.config.network_mode = network_mode;
vm_state.config.ipv6_prefix = snapshot_config.metadata.ipv6_prefix.clone();
vm_state.config.tty = tty_mode;
vm_state.config.interactive = interactive;

Expand Down Expand Up @@ -1101,10 +1106,33 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> {
// No timeout — after snapshot restore, the VM may be CPU-starved (HHVM, EdenFS,
// falcon all resume simultaneously) and fc-agent's MMDS poll + restore handler
// can take minutes. Proceeding early causes exec failures; waiting is correct.
// But poll VM liveness to avoid hanging forever if Firecracker crashes.
if !tty_mode {
match output_connected_rx.await {
Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"),
Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"),
let mut liveness_interval = tokio::time::interval(std::time::Duration::from_secs(5));
liveness_interval.tick().await; // consume immediate first tick
loop {
tokio::select! {
result = &mut output_connected_rx => {
match result {
Ok(()) => info!(vm_id = %vm_id, "fc-agent output connected, exec server ready"),
Err(_) => warn!(vm_id = %vm_id, "output connected_tx dropped"),
}
break;
}
_ = liveness_interval.tick() => {
match vm_manager.try_wait() {
Ok(Some(status)) => {
warn!(vm_id = %vm_id, ?status, "VM exited before fc-agent connected");
break;
}
Ok(None) => {} // still running
Err(e) => {
warn!(vm_id = %vm_id, error = %e, "VM liveness check failed");
break;
}
}
}
}
}
}

Expand Down
Loading
Loading