diff --git a/Cargo.lock b/Cargo.lock index 09607536ae6..b52370bf785 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -651,6 +651,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -2205,6 +2215,7 @@ dependencies = [ "bytes", "cfg_aliases", "clap", + "console_error_panic_hook", "crypto_box", "data-encoding", "der", @@ -2491,7 +2502,6 @@ dependencies = [ "sha1", "simdutf8", "strum", - "stun-rs", "testresult", "thiserror 2.0.12", "time", @@ -2911,7 +2921,7 @@ dependencies = [ [[package]] name = "netwatch" version = "0.5.0" -source = "git+https://github.com/n0-computer/net-tools?branch=feat-new-udp-api#5a6959176ca61fb4286458a4cdb30b61f07abd03" +source = "git+https://github.com/n0-computer/net-tools?branch=feat-interfaces#733e2e0d058ddaea2d5d94b278c689b432083e27" dependencies = [ "atomic-waker", "bytes", @@ -5488,7 +5498,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e4fb3c2a8c2..d4d20598eca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,4 +50,4 @@ unused-async = "warn" iroh-quinn = { git = "https://github.com/n0-computer/quinn", branch = "matheus23/mut-self" } iroh-quinn-udp = { git = "https://github.com/n0-computer/quinn", branch = "matheus23/mut-self" } iroh-quinn-proto = { git = "https://github.com/n0-computer/quinn", branch = "matheus23/mut-self" } -netwatch = { git = "https://github.com/n0-computer/net-tools", branch = "feat-new-udp-api"} +netwatch = { git = "https://github.com/n0-computer/net-tools", branch = "feat-interfaces"} diff --git a/example.config.toml b/example.config.toml index 04d5350b9f3..f36385f79d3 100644 --- a/example.config.toml +++ b/example.config.toml @@ -1,5 +1,2 @@ [[relay_nodes]] url = "https://foo.bar" -stun_only = false -stun_port = 1244 - diff --git a/iroh-relay/Cargo.toml b/iroh-relay/Cargo.toml index 0e56551fb9a..d5087b54636 100644 --- a/iroh-relay/Cargo.toml +++ b/iroh-relay/Cargo.toml @@ -52,7 +52,6 @@ reqwest = { version = "0.12", default-features = false, features = [ rustls = { version = "0.23", default-features = false, features = ["ring"] } serde = { version = "1", features = ["derive", "rc"] } strum = { version = "0.26", features = ["derive"] } -stun-rs = "0.1.11" thiserror = "2" tokio = { version = "1", features = [ "io-util", diff --git a/iroh-relay/src/defaults.rs b/iroh-relay/src/defaults.rs index 12043a0f36e..47959c285bf 100644 --- a/iroh-relay/src/defaults.rs +++ b/iroh-relay/src/defaults.rs @@ -1,10 +1,5 @@ //! Default values used in the relay. -/// The default STUN port used by the Relay server. -/// -/// The STUN port as defined by [RFC 8489]() -pub const DEFAULT_STUN_PORT: u16 = 3478; - /// The default QUIC port used by the Relay server to accept QUIC connections /// for QUIC address discovery /// diff --git a/iroh-relay/src/main.rs b/iroh-relay/src/main.rs index 80ff1401f02..ead6f0b11fa 100644 --- a/iroh-relay/src/main.rs +++ b/iroh-relay/src/main.rs @@ -16,7 +16,6 @@ use iroh_base::NodeId; use iroh_relay::{ defaults::{ DEFAULT_HTTPS_PORT, DEFAULT_HTTP_PORT, DEFAULT_METRICS_PORT, DEFAULT_RELAY_QUIC_PORT, - DEFAULT_STUN_PORT, }, server::{self as relay, ClientRateLimit, QuicConfig}, }; @@ -112,7 +111,7 @@ struct Config { /// /// Defaults to `true`. /// - /// Disabling will leave only the STUN server. The `http_bind_addr` and `tls` + /// Disabling will leave only the quic server. The `http_bind_addr` and `tls` /// configuration options will be ignored. #[serde(default = "cfg_defaults::enable_relay")] enable_relay: bool, @@ -138,15 +137,6 @@ struct Config { /// /// Must exist if `enable_quic_addr_discovery` is `true`. tls: Option, - /// Whether to run a STUN server. It will bind to the same IP as the `addr` field. - /// - /// Defaults to `true`. - #[serde(default = "cfg_defaults::enable_stun")] - enable_stun: bool, - /// The socket address to bind the STUN server on. - /// - /// Defaults to using the `http_bind_addr` with the port set to [`DEFAULT_STUN_PORT`]. - stun_bind_addr: Option, /// Whether to allow QUIC connections for QUIC address discovery /// /// If no `tls` is set, this will error. @@ -172,7 +162,7 @@ struct Config { key_cache_capacity: Option, /// Access control for relaying connections. /// - /// This controls which nodes are allowed to relay connections, other endpoints, like STUN are not controlled by this. + /// This controls which nodes are allowed to relay connections, other endpoints, are not controlled by this. #[serde(default)] access: AccessConfig, } @@ -312,11 +302,6 @@ impl Config { .unwrap_or((Ipv6Addr::UNSPECIFIED, DEFAULT_HTTP_PORT).into()) } - fn stun_bind_addr(&self) -> SocketAddr { - self.stun_bind_addr - .unwrap_or_else(|| SocketAddr::new(self.http_bind_addr().ip(), DEFAULT_STUN_PORT)) - } - fn metrics_bind_addr(&self) -> SocketAddr { self.metrics_bind_addr .unwrap_or_else(|| SocketAddr::new(self.http_bind_addr().ip(), DEFAULT_METRICS_PORT)) @@ -329,8 +314,6 @@ impl Default for Config { enable_relay: cfg_defaults::enable_relay(), http_bind_addr: None, tls: None, - enable_stun: cfg_defaults::enable_stun(), - stun_bind_addr: None, enable_quic_addr_discovery: cfg_defaults::enable_quic_addr_discovery(), limits: None, enable_metrics: cfg_defaults::enable_metrics(), @@ -350,10 +333,6 @@ mod cfg_defaults { true } - pub(crate) fn enable_stun() -> bool { - true - } - pub(crate) fn enable_quic_addr_discovery() -> bool { false } @@ -712,12 +691,8 @@ async fn build_relay_config(cfg: Config) -> Result Vec { - let fp = Fingerprint::default(); - let msg = StunMessageBuilder::new(methods::BINDING, MessageClass::Request) - .with_transaction_id(tx) - .with_attribute(fp) - .build(); - - let encoder = MessageEncoderBuilder::default().build(); - let mut buffer = vec![0u8; 150]; - let size = encoder.encode(&mut buffer, &msg).expect("invalid encoding"); - buffer.truncate(size); - buffer -} - -/// Generates a binding response. -pub fn response(tx: TransactionId, addr: SocketAddr) -> Vec { - let msg = StunMessageBuilder::new(methods::BINDING, MessageClass::SuccessResponse) - .with_transaction_id(tx) - .with_attribute(XorMappedAddress::from(addr)) - .build(); - - let encoder = MessageEncoderBuilder::default().build(); - let mut buffer = vec![0u8; 150]; - let size = encoder.encode(&mut buffer, &msg).expect("invalid encoding"); - buffer.truncate(size); - buffer -} - -// Copied from stun_rs -// const MAGIC_COOKIE: Cookie = Cookie(0x2112_A442); -const COOKIE: [u8; 4] = 0x2112_A442u32.to_be_bytes(); - -/// Reports whether b is a STUN message. -pub fn is(b: &[u8]) -> bool { - b.len() >= stun_rs::MESSAGE_HEADER_SIZE && - b[0]&0b11000000 == 0 && // top two bits must be zero - b[4..8] == COOKIE -} - -/// Parses a STUN binding request. -pub fn parse_binding_request(b: &[u8]) -> Result { - let ctx = DecoderContextBuilder::default() - .with_validation() // ensure fingerprint is validated - .build(); - let decoder = MessageDecoderBuilder::default().with_context(ctx).build(); - let (msg, _) = decoder.decode(b).map_err(|_| Error::InvalidMessage)?; - - let tx = *msg.transaction_id(); - if msg.method() != methods::BINDING { - return Err(Error::NotBinding); - } - - // TODO: Tailscale sets the software to tailscale, we should check if we want to do this too. - - if msg - .attributes() - .last() - .map(|attr| !attr.is_fingerprint()) - .unwrap_or_default() - { - return Err(Error::NoFingerprint); - } - - Ok(tx) -} - -/// Parses a successful binding response STUN packet. -/// The IP address is extracted from the XOR-MAPPED-ADDRESS attribute. -pub fn parse_response(b: &[u8]) -> Result<(TransactionId, SocketAddr), Error> { - let decoder = MessageDecoder::default(); - let (msg, _) = decoder.decode(b).map_err(|_| Error::InvalidMessage)?; - - let tx = *msg.transaction_id(); - if msg.class() != MessageClass::SuccessResponse { - return Err(Error::NotSuccessResponse); - } - - // Read through the attributes. - // The the addr+port reported by XOR-MAPPED-ADDRESS - // as the canonical value. If the attribute is not - // present but the STUN server responds with - // MAPPED-ADDRESS we fall back to it. - - let mut addr = None; - let mut fallback_addr = None; - for attr in msg.attributes() { - match attr { - StunAttribute::XorMappedAddress(a) => { - let mut a = *a.socket_address(); - a.set_ip(a.ip().to_canonical()); - addr = Some(a); - } - StunAttribute::MappedAddress(a) => { - let mut a = *a.socket_address(); - a.set_ip(a.ip().to_canonical()); - fallback_addr = Some(a); - } - _ => {} - } - } - - if let Some(addr) = addr { - return Ok((tx, addr)); - } - - if let Some(addr) = fallback_addr { - return Ok((tx, addr)); - } - - Err(Error::MalformedAttrs) -} - -#[cfg(test)] -mod tests { - - use std::net::{IpAddr, Ipv4Addr}; - - use super::*; - - struct ResponseTestCase { - name: &'static str, - data: Vec, - want_tid: Vec, - want_addr: IpAddr, - want_port: u16, - } - - #[test] - fn test_parse_response() { - let cases = vec![ - ResponseTestCase { - name: "google-1", - data: vec![ - 0x01, 0x01, 0x00, 0x0c, 0x21, 0x12, 0xa4, 0x42, - 0x23, 0x60, 0xb1, 0x1e, 0x3e, 0xc6, 0x8f, 0xfa, - 0x93, 0xe0, 0x80, 0x07, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xc7, 0x86, 0x69, 0x57, 0x85, 0x6f, - ], - want_tid: vec![ - 0x23, 0x60, 0xb1, 0x1e, 0x3e, 0xc6, 0x8f, 0xfa, - 0x93, 0xe0, 0x80, 0x07, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 59028, - }, - ResponseTestCase { - name: "google-2", - data: vec![ - 0x01, 0x01, 0x00, 0x0c, 0x21, 0x12, 0xa4, 0x42, - 0xf9, 0xf1, 0x21, 0xcb, 0xde, 0x7d, 0x7c, 0x75, - 0x92, 0x3c, 0xe2, 0x71, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xc7, 0x87, 0x69, 0x57, 0x85, 0x6f, - ], - want_tid: vec![ - 0xf9, 0xf1, 0x21, 0xcb, 0xde, 0x7d, 0x7c, 0x75, - 0x92, 0x3c, 0xe2, 0x71, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 59029, - }, - ResponseTestCase{ - name: "stun.sipgate.net:10000", - data: vec![ - 0x01, 0x01, 0x00, 0x44, 0x21, 0x12, 0xa4, 0x42, - 0x48, 0x2e, 0xb6, 0x47, 0x15, 0xe8, 0xb2, 0x8e, - 0xae, 0xad, 0x64, 0x44, 0x00, 0x01, 0x00, 0x08, - 0x00, 0x01, 0xe4, 0xab, 0x48, 0x45, 0x21, 0x2d, - 0x00, 0x04, 0x00, 0x08, 0x00, 0x01, 0x27, 0x10, - 0xd9, 0x0a, 0x44, 0x98, 0x00, 0x05, 0x00, 0x08, - 0x00, 0x01, 0x27, 0x11, 0xd9, 0x74, 0x7a, 0x8a, - 0x80, 0x20, 0x00, 0x08, 0x00, 0x01, 0xc5, 0xb9, - 0x69, 0x57, 0x85, 0x6f, 0x80, 0x22, 0x00, 0x10, - 0x56, 0x6f, 0x76, 0x69, 0x64, 0x61, 0x2e, 0x6f, - 0x72, 0x67, 0x20, 0x30, 0x2e, 0x39, 0x36, 0x00, - ], - want_tid: vec![ - 0x48, 0x2e, 0xb6, 0x47, 0x15, 0xe8, 0xb2, 0x8e, - 0xae, 0xad, 0x64, 0x44, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 58539, - }, - ResponseTestCase{ - name: "stun.powervoip.com:3478", - data: vec![ - 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42, - 0x7e, 0x57, 0x96, 0x68, 0x29, 0xf4, 0x44, 0x60, - 0x9d, 0x1d, 0xea, 0xa6, 0x00, 0x01, 0x00, 0x08, - 0x00, 0x01, 0xe9, 0xd3, 0x48, 0x45, 0x21, 0x2d, - 0x00, 0x04, 0x00, 0x08, 0x00, 0x01, 0x0d, 0x96, - 0x4d, 0x48, 0xa9, 0xd4, 0x00, 0x05, 0x00, 0x08, - 0x00, 0x01, 0x0d, 0x97, 0x4d, 0x48, 0xa9, 0xd5, - ], - want_tid: vec![ - 0x7e, 0x57, 0x96, 0x68, 0x29, 0xf4, 0x44, 0x60, - 0x9d, 0x1d, 0xea, 0xa6, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 59859, - }, - ResponseTestCase{ - name: "in-process pion server", - data: vec![ - 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42, - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x0a, - 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x00, 0x00, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43, - 0x80, 0x28, 0x00, 0x04, 0xb6, 0x99, 0xbb, 0x02, - 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42, - ], - want_tid: vec![ - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), - want_port: 61300, - }, - ResponseTestCase{ - name: "stuntman-server ipv6", - data: vec![ - 0x01, 0x01, 0x00, 0x48, 0x21, 0x12, 0xa4, 0x42, - 0x06, 0xf5, 0x66, 0x85, 0xd2, 0x8a, 0xf3, 0xe6, - 0x9c, 0xe3, 0x41, 0xe2, 0x00, 0x01, 0x00, 0x14, - 0x00, 0x02, 0x90, 0xce, 0x26, 0x02, 0x00, 0xd1, - 0xb4, 0xcf, 0xc1, 0x00, 0x38, 0xb2, 0x31, 0xff, - 0xfe, 0xef, 0x96, 0xf6, 0x80, 0x2b, 0x00, 0x14, - 0x00, 0x02, 0x0d, 0x96, 0x26, 0x04, 0xa8, 0x80, - 0x00, 0x02, 0x00, 0xd1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0xc5, 0x70, 0x01, 0x00, 0x20, 0x00, 0x14, - 0x00, 0x02, 0xb1, 0xdc, 0x07, 0x10, 0xa4, 0x93, - 0xb2, 0x3a, 0xa7, 0x85, 0xea, 0x38, 0xc2, 0x19, - 0x62, 0x0c, 0xd7, 0x14, - ], - want_tid: vec![ - 6, 245, 102, 133, 210, 138, 243, 230, 156, 227, - 65, 226, - ], - want_addr: "2602:d1:b4cf:c100:38b2:31ff:feef:96f6".parse().unwrap(), - want_port: 37070, - }, - // Testing STUN attribute padding rules using STUN software attribute - // with values of 1 & 3 length respectively before the XorMappedAddress attribute - ResponseTestCase { - name: "software-a", - data: vec![ - 0x01, 0x01, 0x00, 0x14, 0x21, 0x12, 0xa4, 0x42, - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x01, - 0x61, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43, - ], - want_tid: vec![ - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), - want_port: 61300, - }, - ResponseTestCase { - name: "software-abc", - data: vec![ - 0x01, 0x01, 0x00, 0x14, 0x21, 0x12, 0xa4, 0x42, - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x03, - 0x61, 0x62, 0x63, 0x00, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43, - ], - want_tid: vec![ - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), - want_port: 61300, - }, - ResponseTestCase { - name: "no-4in6", - data: data_encoding::HEXLOWER.decode(b"010100182112a4424fd5d202dcb37d31fc773306002000140002cd3d2112a4424fd5d202dcb382ce2dc3fcc7").unwrap(), - want_tid: vec![79, 213, 210, 2, 220, 179, 125, 49, 252, 119, 51, 6], - want_addr: IpAddr::V4(Ipv4Addr::from([209, 180, 207, 193])), - want_port: 60463, - }, - ]; - - for (i, test) in cases.into_iter().enumerate() { - println!("Case {i}: {}", test.name); - let (tx, addr_port) = parse_response(&test.data).unwrap(); - assert!(is(&test.data)); - assert_eq!(tx.as_bytes(), &test.want_tid[..]); - assert_eq!(addr_port.ip(), test.want_addr); - assert_eq!(addr_port.port(), test.want_port); - } - } - - #[test] - fn test_parse_binding_request() { - let tx = TransactionId::default(); - let req = request(tx); - assert!(is(&req)); - let got_tx = parse_binding_request(&req).unwrap(); - assert_eq!(got_tx, tx); - } - - #[test] - fn test_stun_cookie() { - assert_eq!(stun_rs::MAGIC_COOKIE, COOKIE); - } - - #[test] - fn test_response() { - let txn = |n| TransactionId::from([n; 12]); - - struct Case { - tx: TransactionId, - addr: IpAddr, - port: u16, - } - let tests = vec![ - Case { - tx: txn(1), - addr: "1.2.3.4".parse().unwrap(), - port: 254, - }, - Case { - tx: txn(2), - addr: "1.2.3.4".parse().unwrap(), - port: 257, - }, - Case { - tx: txn(3), - addr: "1::4".parse().unwrap(), - port: 254, - }, - Case { - tx: txn(4), - addr: "1::4".parse().unwrap(), - port: 257, - }, - ]; - - for tt in tests { - let res = response(tt.tx, SocketAddr::new(tt.addr, tt.port)); - assert!(is(&res)); - let (tx2, addr2) = parse_response(&res).unwrap(); - assert_eq!(tt.tx, tx2); - assert_eq!(tt.addr, addr2.ip()); - assert_eq!(tt.port, addr2.port()); - } - } -} diff --git a/iroh-relay/src/quic.rs b/iroh-relay/src/quic.rs index 678d1e5910e..2f2b7ad28db 100644 --- a/iroh-relay/src/quic.rs +++ b/iroh-relay/src/quic.rs @@ -285,9 +285,11 @@ mod tests { use std::net::Ipv4Addr; use anyhow::Context; - use n0_future::{task::AbortOnDropHandle, time}; + use n0_future::{ + task::AbortOnDropHandle, + time::{self, Instant}, + }; use quinn::crypto::rustls::QuicServerConfig; - use tokio::time::Instant; use tracing::{debug, info, info_span, Instrument}; use tracing_test::traced_test; use webpki_types::PrivatePkcs8KeyDer; diff --git a/iroh-relay/src/relay_map.rs b/iroh-relay/src/relay_map.rs index b16da39384f..5aa3745cf7e 100644 --- a/iroh-relay/src/relay_map.rs +++ b/iroh-relay/src/relay_map.rs @@ -5,7 +5,7 @@ use std::{collections::BTreeMap, fmt, sync::Arc}; use iroh_base::RelayUrl; use serde::{Deserialize, Serialize}; -use crate::defaults::{DEFAULT_RELAY_QUIC_PORT, DEFAULT_STUN_PORT}; +use crate::defaults::DEFAULT_RELAY_QUIC_PORT; /// Configuration of all the relay servers that can be used. #[derive(Debug, Clone, PartialEq, Eq)] @@ -116,15 +116,6 @@ impl fmt::Display for RelayMap { pub struct RelayNode { /// The [`RelayUrl`] where this relay server can be dialed. pub url: RelayUrl, - /// Whether this relay server should only be used for STUN requests. - /// - /// This essentially allows you to use a normal STUN server as a relay node, no relay - /// functionality is used. - pub stun_only: bool, - /// The stun port of the relay server. - /// - /// Setting this to `0` means the default STUN port is used. - pub stun_port: u16, /// Configuration to speak to the QUIC endpoint on the relay server. /// /// When `None`, we will not attempt to do QUIC address discovery @@ -137,8 +128,6 @@ impl From for RelayNode { fn from(value: RelayUrl) -> Self { Self { url: value, - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: quic_config(), } } diff --git a/iroh-relay/src/server.rs b/iroh-relay/src/server.rs index ca8d4935cbc..58a73139ed6 100644 --- a/iroh-relay/src/server.rs +++ b/iroh-relay/src/server.rs @@ -14,11 +14,10 @@ //! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over. //! - HTTPS `/ping`: Used for net_report probes. //! - HTTPS `/generate_204`: Used for net_report probes. -//! - STUN: UDP port for STUN requests/responses. use std::{fmt, future::Future, net::SocketAddr, num::NonZeroU32, pin::Pin, sync::Arc}; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{anyhow, Context, Result}; use derive_more::Debug; use http::{ response::Builder as ResponseBuilder, HeaderMap, Method, Request, Response, StatusCode, @@ -28,17 +27,13 @@ use iroh_base::NodeId; #[cfg(feature = "test-utils")] use iroh_base::RelayUrl; use n0_future::{future::Boxed, StreamExt}; -use tokio::{ - net::{TcpListener, UdpSocket}, - task::JoinSet, -}; +use tokio::{net::TcpListener, task::JoinSet}; use tokio_util::task::AbortOnDropHandle; -use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument}; +use tracing::{debug, error, info, info_span, instrument, Instrument}; use crate::{ defaults::DEFAULT_KEY_CACHE_CAPACITY, http::RELAY_PROBE_PATH, - protos, quic::server::{QuicServer, ServerHandle as QuicServerHandle}, }; @@ -80,7 +75,7 @@ fn body_empty() -> BytesBody { http_body_util::Full::new(hyper::body::Bytes::new()) } -/// Configuration for the full Relay & STUN server. +/// Configuration for the full Relay. /// /// Be aware the generic parameters are for when using the Let's Encrypt TLS configuration. /// If not used dummy ones need to be provided, e.g. `ServerConfig::<(), ()>::default()`. @@ -88,8 +83,6 @@ fn body_empty() -> BytesBody { pub struct ServerConfig { /// Configuration for the Relay server, disabled if `None`. pub relay: Option>, - /// Configuration for the STUN server, disabled if `None`. - pub stun: Option, /// Configuration for the QUIC server, disabled if `None`. pub quic: Option, /// Socket to serve metrics on. @@ -156,15 +149,6 @@ pub enum Access { Deny, } -/// Configuration for the STUN server. -#[derive(Debug)] -pub struct StunConfig { - /// The socket address on which the STUN server should bind. - /// - /// Normally you'd chose port `3478`, see [`crate::defaults::DEFAULT_STUN_PORT`]. - pub bind_addr: SocketAddr, -} - /// Configuration for the QUIC server. #[derive(Debug)] pub struct QuicConfig { @@ -248,8 +232,6 @@ pub enum CertConfig { pub struct Server { /// The address of the HTTP server, if configured. http_addr: Option, - /// The address of the STUN server, if configured. - stun_addr: Option, /// The address of the HTTPS server, if the relay server is using TLS. /// /// If the Relay server is not using TLS then it is served from the @@ -296,26 +278,6 @@ impl Server { ); } - // Start the STUN server. - let stun_addr = match config.stun { - Some(stun) => { - debug!("Starting STUN server"); - match UdpSocket::bind(stun.bind_addr).await { - Ok(sock) => { - let addr = sock.local_addr()?; - info!("STUN server listening on {addr}"); - tasks.spawn( - server_stun_listener(sock, metrics.stun.clone()) - .instrument(info_span!("stun-server", %addr)), - ); - Some(addr) - } - Err(err) => bail!("failed to bind STUN listener: {err:#?}"), - } - } - None => None, - }; - // Start the Relay server, but first clone the certs out. let certificates = config.relay.as_ref().and_then(|relay| { relay.tls.as_ref().and_then(|tls| match tls.cert { @@ -433,7 +395,6 @@ impl Server { Ok(Self { http_addr: http_addr.or(relay_addr), - stun_addr, https_addr: http_addr.and(relay_addr), quic_addr, relay_handle, @@ -482,11 +443,6 @@ impl Server { self.quic_addr } - /// The socket address the STUN server is listening on. - pub fn stun_addr(&self) -> Option { - self.stun_addr - } - /// The certificates chain if configured with manual TLS certificates. pub fn certificates(&self) -> Option>> { self.certificates.clone() @@ -585,91 +541,6 @@ async fn relay_supervisor( ret } -/// Runs a STUN server. -/// -/// When the future is dropped, the server stops. -async fn server_stun_listener(sock: UdpSocket, metrics: Arc) -> Result<()> { - info!(addr = ?sock.local_addr().ok(), "running STUN server"); - let sock = Arc::new(sock); - let mut buffer = vec![0u8; 64 << 10]; - let mut tasks = JoinSet::new(); - loop { - tokio::select! { - biased; - - Some(res) = tasks.join_next() => { - if let Err(err) = res { - if err.is_panic() { - panic!("task panicked: {:#?}", err); - } - } - } - res = sock.recv_from(&mut buffer) => { - match res { - Ok((n, src_addr)) => { - metrics.requests.inc(); - let pkt = &buffer[..n]; - if !protos::stun::is(pkt) { - debug!(%src_addr, "STUN: ignoring non stun packet"); - metrics.bad_requests.inc(); - continue; - } - let pkt = pkt.to_vec(); - tasks.spawn(handle_stun_request(src_addr, pkt, sock.clone(), metrics.clone())); - } - Err(err) => { - metrics.failures.inc(); - warn!("failed to recv: {err:#}"); - } - } - } - } - } -} - -/// Handles a single STUN request, doing all logging required. -async fn handle_stun_request( - src_addr: SocketAddr, - pkt: Vec, - sock: Arc, - metrics: Arc, -) { - let (txid, response) = match protos::stun::parse_binding_request(&pkt) { - Ok(txid) => { - debug!(%src_addr, %txid, "STUN: received binding request"); - (txid, protos::stun::response(txid, src_addr)) - } - Err(err) => { - metrics.bad_requests.inc(); - warn!(%src_addr, "STUN: invalid binding request: {:?}", err); - return; - } - }; - - match sock.send_to(&response, src_addr).await { - Ok(len) => { - if len != response.len() { - warn!( - %src_addr, - %txid, - "failed to write response, {len}/{} bytes sent", - response.len() - ); - } else { - match src_addr { - SocketAddr::V4(_) => metrics.ipv4_success.inc(), - SocketAddr::V6(_) => metrics.ipv6_success.inc(), - }; - } - trace!(%src_addr, %txid, "sent {len} bytes"); - } - Err(err) => { - metrics.failures.inc(); - warn!(%src_addr, %txid, "failed to write response: {err:#}"); - } - } -} - fn root_handler( _r: Request, response: ResponseBuilder, @@ -842,7 +713,6 @@ mod tests { access: AccessConfig::Everyone, }), quic: None, - stun: None, metrics_addr: None, }) .await @@ -896,7 +766,6 @@ mod tests { key_cache_capacity: Some(1024), access: AccessConfig::Everyone, }), - stun: None, quic: None, metrics_addr: Some((Ipv4Addr::LOCALHOST, 1234).into()), }) @@ -1130,38 +999,6 @@ mod tests { Ok(()) } - #[tokio::test] - #[traced_test] - async fn test_stun() { - let server = Server::spawn(ServerConfig::<(), ()> { - relay: None, - stun: Some(StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - }), - quic: None, - metrics_addr: None, - }) - .await - .unwrap(); - - let txid = protos::stun::TransactionId::default(); - let req = protos::stun::request(txid); - let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - socket - .send_to(&req, server.stun_addr().unwrap()) - .await - .unwrap(); - - // get response - let mut buf = vec![0u8; 64000]; - let (len, addr) = socket.recv_from(&mut buf).await.unwrap(); - assert_eq!(addr, server.stun_addr().unwrap()); - buf.truncate(len); - let (txid_back, response_addr) = protos::stun::parse_response(&buf).unwrap(); - assert_eq!(txid, txid_back); - assert_eq!(response_addr, socket.local_addr().unwrap()); - } - #[tokio::test] #[traced_test] async fn test_relay_access_control() -> Result<()> { @@ -1188,7 +1025,6 @@ mod tests { })), }), quic: None, - stun: None, metrics_addr: None, }) .await diff --git a/iroh-relay/src/server/testing.rs b/iroh-relay/src/server/testing.rs index fd2989cd81d..efaaf030ed0 100644 --- a/iroh-relay/src/server/testing.rs +++ b/iroh-relay/src/server/testing.rs @@ -1,18 +1,7 @@ //! Exposes functions to quickly configure a server suitable for testing. use std::net::Ipv4Addr; -use super::{ - AccessConfig, CertConfig, QuicConfig, RelayConfig, ServerConfig, StunConfig, TlsConfig, -}; - -/// Creates a [`StunConfig`] suitable for testing. -/// -/// To ensure port availability for testing, the port is configured to be assigned by the OS. -pub fn stun_config() -> StunConfig { - StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - } -} +use super::{AccessConfig, CertConfig, QuicConfig, RelayConfig, ServerConfig, TlsConfig}; /// Creates a [`rustls::ServerConfig`] and certificates suitable for testing. /// @@ -88,13 +77,11 @@ pub fn quic_config() -> QuicConfig { /// Creates a [`ServerConfig`] suitable for testing. /// /// - Relaying is enabled using [`relay_config`] -/// - Stun is enabled using [`stun_config`] /// - QUIC addr discovery is disabled. /// - Metrics are not enabled. pub fn server_config() -> ServerConfig<()> { ServerConfig { relay: Some(relay_config()), - stun: Some(stun_config()), quic: Some(quic_config()), #[cfg(feature = "metrics")] metrics_addr: None, diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index ddd6a753799..1e599a90a95 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -134,6 +134,7 @@ getrandom = { version = "0.3.2", features = ["wasm_js"] } # target-common test/dev dependencies [dev-dependencies] +console_error_panic_hook = "0.1" postcard = { version = "1.1.1", features = ["use-std"] } testresult = "0.4.0" tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/iroh/examples/transfer.rs b/iroh/examples/transfer.rs index 5ce87bae754..86811e4b681 100644 --- a/iroh/examples/transfer.rs +++ b/iroh/examples/transfer.rs @@ -189,6 +189,9 @@ impl EndpointArgs { } }; builder = builder.secret_key(secret_key); + if Env::Dev == self.env { + builder = builder.insecure_skip_relay_cert_verify(true); + } let relay_mode = if self.no_relay { RelayMode::Disabled @@ -249,13 +252,25 @@ impl EndpointArgs { for local_endpoint in endpoint.direct_addresses().initialized().await? { println!("\t{} (type: {:?})", local_endpoint.addr, local_endpoint.typ) } - if !self.no_relay { - let relay_url = endpoint - .home_relay() - .get()? - .pop() - .context("Failed to resolve our home relay")?; - println!("Our home relay server:\n\t{relay_url}"); + + if self.relay_only { + let relay_url = endpoint.home_relay().initialized().await?; + println!("Our home relay server:\t{relay_url}"); + } else if !self.no_relay { + let relay_url = tokio::time::timeout(Duration::from_secs(5), async { + endpoint + .home_relay() + .initialized() + .await + .expect("disconnected") + }) + .await + .ok(); + if let Some(url) = relay_url { + println!("Our home relay server:\t{url}"); + } else { + println!("No home relay server found"); + } } println!(); diff --git a/iroh/src/defaults.rs b/iroh/src/defaults.rs index 6b0aa6eb6d0..723309bf8b7 100644 --- a/iroh/src/defaults.rs +++ b/iroh/src/defaults.rs @@ -9,7 +9,6 @@ pub use iroh_relay::defaults::DEFAULT_RELAY_QUIC_PORT; /// /// The STUN port as defined by [RFC /// 8489]() -pub use iroh_relay::defaults::DEFAULT_STUN_PORT; use url::Url; /// The default HTTP port used by the Relay server. @@ -51,8 +50,6 @@ pub mod prod { .expect("default url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -65,8 +62,6 @@ pub mod prod { .expect("default_url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -79,8 +74,6 @@ pub mod prod { .expect("default_url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -114,8 +107,6 @@ pub mod staging { .expect("default url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -128,8 +119,6 @@ pub mod staging { .expect("default_url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index cfc792fe01c..35f3707cf61 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -1027,7 +1027,7 @@ impl Endpoint { /// # }); /// ``` #[doc(hidden)] - pub fn net_report(&self) -> n0_watcher::Direct>> { + pub fn net_report(&self) -> impl Watcher> { self.msock.net_report() } @@ -2214,12 +2214,9 @@ fn is_cgi() -> bool { // https://github.com/n0-computer/iroh/issues/1183 #[cfg(test)] mod tests { - - use std::time::Instant; - use iroh_metrics::MetricsSource; use iroh_relay::http::Protocol; - use n0_future::{task::AbortOnDropHandle, StreamExt}; + use n0_future::{task::AbortOnDropHandle, time::Instant, StreamExt}; use rand::SeedableRng; use testresult::TestResult; use tracing::{error_span, info, info_span, Instrument}; @@ -2749,8 +2746,8 @@ mod tests { #[tokio::test] #[traced_test] - async fn test_direct_addresses_no_stun_relay() { - let (relay_map, _, _guard) = run_relay_server_with(None, false).await.unwrap(); + async fn test_direct_addresses_no_qad_relay() { + let (relay_map, _, _guard) = run_relay_server_with(false).await.unwrap(); let ep = Endpoint::builder() .alpns(vec![TEST_ALPN.to_vec()]) @@ -3166,7 +3163,7 @@ mod tests { .await?; // can get a first report - endpoint.net_report().initialized().await?; + endpoint.net_report().updated().await?; Ok(()) } diff --git a/iroh/src/magicsock.rs b/iroh/src/magicsock.rs index 2708778d0e2..154e816178a 100644 --- a/iroh/src/magicsock.rs +++ b/iroh/src/magicsock.rs @@ -28,14 +28,14 @@ use std::{ task::{Context, Poll}, }; -use anyhow::{anyhow, Context as _, Result}; +use anyhow::{Context as _, Result}; use bytes::Bytes; use data_encoding::HEXLOWER; use iroh_base::{NodeAddr, NodeId, PublicKey, RelayUrl, SecretKey}; -use iroh_relay::{protos::stun, RelayMap}; +use iroh_relay::RelayMap; use n0_future::{ boxed::BoxStream, - task::{self, JoinSet}, + task::{self, AbortOnDropHandle}, time::{self, Duration, Instant}, StreamExt, }; @@ -44,12 +44,12 @@ use netwatch::netmon; #[cfg(not(wasm_browser))] use netwatch::{ip::LocalAddresses, UdpSocket}; use quinn::{AsyncUdpSocket, ServerConfig}; -use rand::{seq::SliceRandom, Rng, SeedableRng}; +use rand::Rng; use smallvec::SmallVec; -use tokio::sync::{self, mpsc, Mutex}; +use tokio::sync::{mpsc, Mutex}; +use tokio_util::sync::CancellationToken; use tracing::{ - debug, error, error_span, event, info, info_span, instrument, trace, trace_span, warn, - Instrument, Level, Span, + debug, error, event, info, info_span, instrument, trace, trace_span, warn, Instrument, Level, }; use transports::LocalAddrsWatch; use url::Url; @@ -73,7 +73,7 @@ use crate::{ discovery::{Discovery, DiscoveryItem, DiscoverySubscribers, NodeData, UserData}, key::{public_ed_box, secret_ed_box, DecryptionError, SharedSecret}, metrics::EndpointMetrics, - net_report::{self, IpMappedAddresses, Report}, + net_report::{self, IfStateDetails, IpMappedAddresses, Report}, }; mod metrics; @@ -161,8 +161,10 @@ type RelayContents = SmallVec<[Bytes; 1]>; pub(crate) struct Handle { #[deref(forward)] msock: Arc, - // Empty when closed - actor_tasks: Arc>>, + // empty when shutdown + actor_task: Arc>>>, + /// Token to cancel the actor task. + actor_token: CancellationToken, // quinn endpoint endpoint: quinn::Endpoint, } @@ -177,69 +179,52 @@ pub(crate) struct Handle { /// It is usually only necessary to use a single [`MagicSock`] instance in an application, it /// means any QUIC endpoints on top will be sharing as much information about nodes as /// possible. -#[derive(derive_more::Debug)] +#[derive(Debug)] pub(crate) struct MagicSock { + /// Channel to send to the internal actor. actor_sender: mpsc::Sender, - /// String representation of the node_id of this node. - me: String, - - /// The DNS resolver to be used in this magicsock. - #[cfg(not(wasm_browser))] - dns_resolver: DnsResolver, - - /// Key for this node. - secret_key: SecretKey, - /// Encryption key for this node. - secret_encryption_key: crypto_box::SecretKey, + /// NodeId of this node. + public_key: PublicKey, + // - State Management /// Close is in progress (or done) closing: AtomicBool, /// Close was called. closed: AtomicBool, + + // - Networking Info + /// Our discovered direct addresses. + direct_addrs: DiscoveredDirectAddrs, + /// Our latest net-report + net_report: Watchable<(Option, UpdateReason)>, /// If the last net_report report, reports IPv6 to be available. ipv6_reported: Arc, - - /// Zero nodes means relay is disabled. - relay_map: RelayMap, /// Tracks the networkmap node entity for each node discovery key. node_map: NodeMap, /// Tracks the mapped IP addresses ip_mapped_addrs: IpMappedAddresses, - /// NetReport client - net_reporter: net_report::Addr, - /// The state for an active DiscoKey. - disco_secrets: DiscoSecrets, + /// Local addresses + local_addrs_watch: LocalAddrsWatch, + /// Currently bound IP addresses of all sockets + #[cfg(not(wasm_browser))] + ip_bind_addrs: Vec, + /// The DNS resolver to be used in this magicsock. + #[cfg(not(wasm_browser))] + dns_resolver: DnsResolver, - /// Disco (ping) queue - disco_sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>, + /// Disco + disco: DiscoState, + // - Discovery /// Optional discovery service discovery: Option>, - /// Optional user-defined discover data. discovery_user_data: RwLock>, - - /// Our discovered direct addresses. - direct_addrs: DiscoveredDirectAddrs, - - /// Our latest net-report - net_report: Watchable>>, - - /// List of CallMeMaybe disco messages that should be sent out after the next endpoint update - /// completes - pending_call_me_maybes: std::sync::Mutex>, - - /// Indicates the direct addr update state. - direct_addr_update_state: DirectAddrUpdateState, - /// Broadcast channel for listening to discovery updates. discovery_subscribers: DiscoverySubscribers, + /// Metrics pub(crate) metrics: EndpointMetrics, - - local_addrs_watch: LocalAddrsWatch, - #[cfg(not(wasm_browser))] - ip_bind_addrs: Vec, } impl MagicSock { @@ -269,10 +254,6 @@ impl MagicSock { self.closed.load(Ordering::SeqCst) } - fn public_key(&self) -> PublicKey { - self.secret_key.public() - } - /// Get the cached version of addresses. pub(crate) fn local_addr(&self) -> Vec { self.local_addrs_watch.get().expect("disconnected") @@ -339,8 +320,11 @@ impl MagicSock { /// /// [`Watcher`]: n0_watcher::Watcher /// [`Watcher::initialized`]: n0_watcher::Watcher::initialized - pub(crate) fn net_report(&self) -> n0_watcher::Direct>> { - self.net_report.watch() + pub(crate) fn net_report(&self) -> impl Watcher> { + self.net_report + .watch() + .map(|(r, _)| r) + .expect("disconnected") } /// Watch for changes to the home relay. @@ -385,7 +369,7 @@ impl MagicSock { } /// Add addresses for a node to the magic socket's addresbook. - #[instrument(skip_all, fields(me = %self.me))] + #[instrument(skip_all)] pub fn add_node_addr(&self, mut addr: NodeAddr, source: node_map::Source) -> Result<()> { let mut pruned = 0; for my_addr in self.direct_addrs.sockaddrs() { @@ -636,15 +620,7 @@ impl MagicSock { // byte of those packets with zero to make Quinn ignore the packet. This // relies on quinn::EndpointConfig::grease_quic_bit being set to `false`, // which we do in Endpoint::bind. - if source_addr.is_ip() && stun::is(datagram) { - trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: stun packet"); - let packet2 = Bytes::copy_from_slice(datagram); - self.net_reporter.receive_stun_packet( - packet2, - source_addr.clone().try_into().expect("checked"), - ); - datagram[0] = 0u8; - } else if let Some((sender, sealed_box)) = disco::source_and_box(datagram) { + if let Some((sender, sealed_box)) = disco::source_and_box(datagram) { trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: disco packet"); self.handle_disco_message(sender, sealed_box, source_addr); datagram[0] = 0u8; @@ -761,17 +737,17 @@ impl MagicSock { if let transports::Addr::Relay(_, node_id) = src { if node_id != &sender { // TODO: return here? - warn!("Received relay disco message from connection for {:?}, but with message from {}", node_id.fmt_short(), sender.fmt_short()); + warn!( + "Received relay disco message from connection for {}, but with message from {}", + node_id.fmt_short(), + sender.fmt_short() + ); } } // We're now reasonably sure we're expecting communication from // this node, do the heavy crypto lifting to see what they want. - let dm = match self.disco_secrets.unseal_and_decode( - &self.secret_encryption_key, - sender, - sealed_box.to_vec(), - ) { + let dm = match self.disco.unseal_and_decode(sender, sealed_box) { Ok(dm) => dm, Err(DiscoBoxError::Open(err)) => { warn!(?err, "failed to open disco box"); @@ -879,11 +855,7 @@ impl MagicSock { txn = ?dm.tx_id, ); - if self - .disco_sender - .try_send((addr.clone(), sender, pong)) - .is_err() - { + if self.disco.try_send(addr.clone(), sender, pong).is_err() { warn!(%addr, "failed to queue pong"); } @@ -897,15 +869,6 @@ impl MagicSock { } } - fn encode_disco_message(&self, dst_key: PublicKey, msg: &disco::Message) -> Bytes { - self.disco_secrets.encode_and_seal( - &self.secret_encryption_key, - self.secret_key.public(), - dst_key, - msg, - ) - } - fn send_ping_queued(&self, ping: SendPing) { let SendPing { id, @@ -916,13 +879,9 @@ impl MagicSock { } = ping; let msg = disco::Message::Ping(disco::Ping { tx_id, - node_key: self.public_key(), + node_key: self.public_key, }); - let sent = self - .disco_sender - .try_send((dst.clone(), dst_node, msg)) - .is_ok(); - + let sent = self.disco.try_send(dst.clone(), dst_node, msg).is_ok(); if sent { let msg_sender = self.actor_sender.clone(); trace!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent (queued)"); @@ -933,7 +892,7 @@ impl MagicSock { } } - /// Tries to send the ping actions. + /// Send the given ping actions out. async fn send_ping_actions(&self, sender: &UdpSender, msgs: Vec) -> io::Result<()> { for msg in msgs { // Abort sending as soon as we know we are shutting down. @@ -942,20 +901,68 @@ impl MagicSock { } match msg { PingAction::SendCallMeMaybe { - ref relay_url, + relay_url, dst_node, } => { - self.send_or_queue_call_me_maybe(relay_url, dst_node); + // Sends the call-me-maybe DISCO message, queuing if addresses are too stale. + // + // To send the call-me-maybe message, we need to know our current direct addresses. If + // this information is too stale, the call-me-maybe is queued while a net_report run is + // scheduled. Once this run finishes, the call-me-maybe will be sent. + match self.direct_addrs.fresh_enough() { + Ok(()) => { + let msg = disco::Message::CallMeMaybe( + self.direct_addrs.to_call_me_maybe_message(), + ); + if self + .disco + .try_send(SendAddr::Relay(relay_url.clone()), dst_node, msg.clone()) + .is_err() + { + warn!(dstkey = %dst_node.fmt_short(), %relay_url, "relay channel full, dropping call-me-maybe"); + } else { + debug!(dstkey = %dst_node.fmt_short(), %relay_url, "call-me-maybe sent"); + } + } + Err(last_refresh_ago) => { + debug!( + ?last_refresh_ago, + "want call-me-maybe but direct addrs stale; queuing after restun", + ); + self.actor_sender + .try_send(ActorMessage::ScheduleDirectAddrUpdate( + UpdateReason::RefreshForPeering, + Some((dst_node, relay_url)), + )) + .ok(); + } + } } - PingAction::SendPing(ping) => { - self.send_ping(sender, ping).await?; + PingAction::SendPing(SendPing { + id, + dst, + dst_node, + tx_id, + purpose, + }) => { + let msg = disco::Message::Ping(disco::Ping { + tx_id, + node_key: self.public_key, + }); + + self.send_disco_message(sender, dst.clone(), dst_node, msg) + .await?; + debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent"); + let msg_sender = self.actor_sender.clone(); + self.node_map + .notify_ping_sent(id, dst, tx_id, purpose, msg_sender); } } } Ok(()) } - /// Send a disco message. UDP messages will be polled to send directly on the UDP socket. + /// Sends out a disco message. async fn send_disco_message( &self, sender: &UdpSender, @@ -975,7 +982,8 @@ impl MagicSock { "connection closed", )); } - let pkt = self.encode_disco_message(dst_key, &msg); + + let pkt = self.disco.encode_and_seal(self.public_key, dst_key, &msg); let transmit = transports::Transmit { contents: &pkt, @@ -998,90 +1006,6 @@ impl MagicSock { } } - async fn send_ping(&self, sender: &UdpSender, ping: SendPing) -> io::Result<()> { - let SendPing { - id, - dst, - dst_node, - tx_id, - purpose, - } = ping; - let msg = disco::Message::Ping(disco::Ping { - tx_id, - node_key: self.public_key(), - }); - - self.send_disco_message(sender, dst.clone(), dst_node, msg) - .await?; - debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent"); - let msg_sender = self.actor_sender.clone(); - self.node_map - .notify_ping_sent(id, dst.clone(), tx_id, purpose, msg_sender); - Ok(()) - } - - fn send_queued_call_me_maybes(&self) { - let msg = self.direct_addrs.to_call_me_maybe_message(); - let msg = disco::Message::CallMeMaybe(msg); - for (public_key, url) in self - .pending_call_me_maybes - .lock() - .expect("poisoned") - .drain() - { - if self - .disco_sender - .try_send((SendAddr::Relay(url), public_key, msg.clone())) - .is_err() - { - warn!(node = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe"); - } - } - } - - /// Sends the call-me-maybe DISCO message, queuing if addresses are too stale. - /// - /// To send the call-me-maybe message, we need to know our current direct addresses. If - /// this information is too stale, the call-me-maybe is queued while a net_report run is - /// scheduled. Once this run finishes, the call-me-maybe will be sent. - fn send_or_queue_call_me_maybe(&self, url: &RelayUrl, dst_node: NodeId) { - match self.direct_addrs.fresh_enough() { - Ok(()) => { - let msg = self.direct_addrs.to_call_me_maybe_message(); - let msg = disco::Message::CallMeMaybe(msg); - if self - .disco_sender - .try_send((SendAddr::Relay(url.clone()), dst_node, msg.clone())) - .is_err() - { - warn!(dstkey = %dst_node.fmt_short(), relayurl = %url, - "relay channel full, dropping call-me-maybe"); - } else { - debug!(dstkey = %dst_node.fmt_short(), relayurl = %url, "call-me-maybe sent"); - } - } - Err(last_refresh_ago) => { - self.pending_call_me_maybes - .lock() - .expect("poisoned") - .insert(dst_node, url.clone()); - debug!( - ?last_refresh_ago, - "want call-me-maybe but direct addrs stale; queuing after restun", - ); - self.re_stun("refresh-for-peering"); - } - } - } - - /// Triggers an address discovery. The provided why string is for debug logging only. - #[instrument(skip_all)] - fn re_stun(&self, why: &'static str) { - debug!("re_stun: {}", why); - self.metrics.magicsock.re_stun_calls.inc(); - self.direct_addr_update_state.schedule_run(why); - } - /// Publishes our address to a discovery service, if configured. /// /// Called whenever our addresses or home relay node changes. @@ -1140,63 +1064,135 @@ impl From for MappedAddr { /// and start a new one when the current one has finished #[derive(Debug)] struct DirectAddrUpdateState { - /// If running, set to the reason for the currently the update. - running: sync::watch::Sender>, /// If set, start a new update as soon as the current one is finished. - want_update: std::sync::Mutex>, + want_update: Option, + msock: Arc, + /// Configuration for net report + net_report_config: net_report::Options, + #[cfg(not(wasm_browser))] + port_mapper: portmapper::Client, + /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings. + net_reporter: Arc>, + relay_map: RelayMap, + run_done: mpsc::Sender<()>, +} + +#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)] +enum UpdateReason { + /// Initial state + #[default] + None, + RefreshForPeering, + Periodic, + PortmapUpdated, + LinkChangeMajor, + LinkChangeMinor, } impl DirectAddrUpdateState { - fn new() -> Self { - let (running, _) = sync::watch::channel(None); + fn new( + msock: Arc, + net_report_config: net_report::Options, + #[cfg(not(wasm_browser))] port_mapper: portmapper::Client, + net_reporter: Arc>, + relay_map: RelayMap, + run_done: mpsc::Sender<()>, + ) -> Self { DirectAddrUpdateState { - running, want_update: Default::default(), + net_report_config, + #[cfg(not(wasm_browser))] + port_mapper, + net_reporter, + msock, + relay_map, + run_done, } } /// Schedules a new run, either starting it immediately if none is running or /// scheduling it for later. - fn schedule_run(&self, why: &'static str) { - if self.is_running() { - let _ = self.want_update.lock().expect("poisoned").insert(why); - } else { - self.run(why); + fn schedule_run(&mut self, why: UpdateReason, if_state: IfStateDetails) { + match self.net_reporter.clone().try_lock_owned() { + Ok(net_reporter) => { + self.run(why, if_state, net_reporter); + } + Err(_) => { + let _ = self.want_update.insert(why); + } } } - /// Returns `true` if an update is currently in progress. - fn is_running(&self) -> bool { - self.running.borrow().is_some() + /// If another run is needed, triggers this run, otherwise does nothing. + fn try_run(&mut self, if_state: IfStateDetails) { + match self.net_reporter.clone().try_lock_owned() { + Ok(net_reporter) => { + if let Some(why) = self.want_update.take() { + self.run(why, if_state, net_reporter); + } + } + Err(_) => { + // do nothing + } + } } /// Trigger a new run. - fn run(&self, why: &'static str) { - self.running.send(Some(why)).ok(); - } + fn run( + &mut self, + why: UpdateReason, + if_state: IfStateDetails, + mut net_reporter: tokio::sync::OwnedMutexGuard, + ) { + debug!("starting direct addr update ({:?})", why); + #[cfg(not(wasm_browser))] + self.port_mapper.procure_mapping(); + // Don't start a net report probe if we know + // we are shutting down + if self.msock.is_closing() || self.msock.is_closed() { + debug!("skipping net_report, socket is shutting down"); + return; + } + if self.relay_map.is_empty() { + debug!("skipping net_report, empty RelayMap"); + self.msock.net_report.set((None, why)).ok(); + return; + } - /// Clears the current running state. - fn finish_run(&self) { - self.running.send(None).ok(); - } + let relay_map = self.relay_map.clone(); + let opts = self.net_report_config.clone(); + + debug!("requesting net_report report"); + let msock = self.msock.clone(); + + let run_done = self.run_done.clone(); + task::spawn(async move { + let fut = time::timeout( + NET_REPORT_TIMEOUT, + net_reporter.get_report(relay_map, if_state, opts), + ); + match fut.await { + Ok(Ok(report)) => { + msock.net_report.set((Some(report), why)).ok(); + } + Ok(Err(_)) => { + warn!("net_report report not received"); + } + Err(err) => { + warn!("net_report report timeout: {:?}", err); + } + } - /// Returns the next update, if one is set. - fn next_update(&self) -> Option<&'static str> { - self.want_update.lock().expect("poisoned").take() + // mark run as finished + debug!("direct addr update done ({:?})", why); + run_done.send(()).await.ok(); + }); } } impl Handle { /// Creates a magic [`MagicSock`] listening on [`Options::addr_v4`] and [`Options::addr_v6`]. async fn new(opts: Options) -> Result { - let me = opts.secret_key.public().fmt_short(); - - Self::with_name(me, opts) - .instrument(error_span!("magicsock")) - .await - } - - async fn with_name(me: String, opts: Options) -> Result { let Options { addr_v4, addr_v6, @@ -1222,21 +1218,6 @@ impl Handle { #[cfg(not(wasm_browser))] let (ip_transports, port_mapper) = bind_ip(addr_v4, addr_v6, &metrics)?; - #[cfg(not(wasm_browser))] - let v4_socket = ip_transports - .iter() - .find(|t| t.bind_addr().is_ipv4()) - .expect("must bind a ipv4 socket") - .socket(); - #[cfg(not(wasm_browser))] - let v6_socket = ip_transports.iter().find_map(|t| { - if t.bind_addr().is_ipv6() { - Some(t.socket()) - } else { - None - } - }); - let ip_mapped_addrs = IpMappedAddresses::default(); let net_reporter = net_report::Client::new( @@ -1250,7 +1231,6 @@ impl Handle { )?; let (actor_sender, actor_receiver) = mpsc::channel(256); - let (disco_sender, mut disco_receiver) = mpsc::channel(256); // load the node data let node_map = node_map.unwrap_or_default(); @@ -1285,26 +1265,21 @@ impl Handle { #[cfg(wasm_browser)] let transports = Transports::new(relay_transports); + let (disco, disco_receiver) = DiscoState::new(secret_encryption_key); + let msock = Arc::new(MagicSock { - me, - secret_key, - secret_encryption_key, + public_key: secret_key.public(), closing: AtomicBool::new(false), closed: AtomicBool::new(false), + disco, actor_sender: actor_sender.clone(), ipv6_reported, - relay_map, - net_reporter: net_reporter.addr(), - disco_secrets: DiscoSecrets::default(), node_map, ip_mapped_addrs, - disco_sender, discovery, discovery_user_data: RwLock::new(discovery_user_data), direct_addrs: Default::default(), - net_report: Default::default(), - pending_call_me_maybes: Default::default(), - direct_addr_update_state: DirectAddrUpdateState::new(), + net_report: Watchable::new((None, UpdateReason::None)), #[cfg(not(wasm_browser))] dns_resolver, discovery_subscribers: DiscoverySubscribers::new(), @@ -1322,8 +1297,7 @@ impl Handle { // the packet if grease_quic_bit is set to false. endpoint_config.grease_quic_bit(false); - let sender1 = transports.create_sender(msock.clone()); - let sender2 = transports.create_sender(msock.clone()); + let sender = transports.create_sender(msock.clone()); let local_addrs_watch = transports.local_addrs_watch(); let network_change_sender = transports.create_network_change_sender(); @@ -1340,20 +1314,6 @@ impl Handle { Arc::new(crate::web_runtime::WebRuntime), )?; - let mut actor_tasks = JoinSet::default(); - - #[cfg(not(wasm_browser))] - let _ = actor_tasks.spawn({ - let msock = msock.clone(); - async move { - while let Some((dst, dst_key, msg)) = disco_receiver.recv().await { - if let Err(err) = msock.send_disco_message(&sender1, dst.clone(), dst_key, msg).await { - warn!(%dst, node = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)"); - } - } - } - }); - let network_monitor = netmon::Monitor::new().await?; let qad_endpoint = endpoint.clone(); @@ -1368,49 +1328,59 @@ impl Handle { let net_report_config = net_report::Options::default(); #[cfg(not(wasm_browser))] - let net_report_config = net_report_config - .stun_v4(Some(v4_socket)) - .stun_v6(v6_socket) - .quic_config(Some(QuicConfig { - ep: qad_endpoint, - client_config, - ipv4: true, - ipv6, - })); + let net_report_config = net_report_config.quic_config(Some(QuicConfig { + ep: qad_endpoint, + client_config, + ipv4: true, + ipv6, + })); #[cfg(any(test, feature = "test-utils"))] let net_report_config = net_report_config.insecure_skip_relay_cert_verify(insecure_skip_relay_cert_verify); + let (direct_addr_done_tx, direct_addr_done_rx) = mpsc::channel(8); + let direct_addr_update_state = DirectAddrUpdateState::new( + msock.clone(), + net_report_config, + #[cfg(not(wasm_browser))] + port_mapper, + Arc::new(Mutex::new(net_reporter)), + relay_map, + direct_addr_done_tx, + ); + + let netmon_watcher = network_monitor.interface_state(); let actor = Actor { msg_receiver: actor_receiver, - msg_sender: actor_sender, msock: msock.clone(), periodic_re_stun_timer: new_re_stun_timer(false), - net_info_last: None, - #[cfg(not(wasm_browser))] - port_mapper, - no_v4_send: false, - net_reporter, network_monitor, - net_report_config, + netmon_watcher, + direct_addr_update_state, network_change_sender, + direct_addr_done_rx, + pending_call_me_maybes: Default::default(), + disco_receiver, }; - actor_tasks.spawn( + let actor_token = CancellationToken::new(); + let token = actor_token.clone(); + let actor_task = task::spawn( async move { - if let Err(err) = actor.run(local_addrs_watch, sender2).await { + if let Err(err) = actor.run(token, local_addrs_watch, sender).await { warn!("relay handler errored: {:?}", err); } } .instrument(info_span!("actor")), ); - let actor_tasks = Arc::new(Mutex::new(actor_tasks)); + let actor_task = Arc::new(Mutex::new(Some(AbortOnDropHandle::new(actor_task)))); Ok(Handle { msock, - actor_tasks, + actor_task, endpoint, + actor_token, }) } @@ -1424,9 +1394,9 @@ impl Handle { /// Only the first close does anything. Any later closes return nil. /// Polling the socket ([`AsyncUdpSocket::poll_recv`]) will return [`Poll::Pending`] /// indefinitely after this call. - #[instrument(skip_all, fields(me = %self.msock.me))] + #[instrument(skip_all)] pub(crate) async fn close(&self) { - trace!("magicsock closing..."); + trace!(me = ?self.public_key, "magicsock closing..."); // Initiate closing all connections, and refuse future connections. self.endpoint.close(0u16.into(), b""); @@ -1451,38 +1421,27 @@ impl Handle { return; } self.msock.closing.store(true, Ordering::Relaxed); - // If this fails, then there's no receiver listening for shutdown messages, - // so nothing to shut down anyways. - self.msock - .actor_sender - .send(ActorMessage::Shutdown) - .await - .ok(); - self.msock.closed.store(true, Ordering::SeqCst); + self.actor_token.cancel(); - let mut tasks = self.actor_tasks.lock().await; - - // give the tasks a moment to shutdown cleanly - let tasks_ref = &mut tasks; - let shutdown_done = time::timeout(Duration::from_millis(100), async move { - while let Some(task) = tasks_ref.join_next().await { - if let Err(err) = task { + if let Some(task) = self.actor_task.lock().await.take() { + // give the tasks a moment to shutdown cleanly + let shutdown_done = time::timeout(Duration::from_millis(100), async move { + if let Err(err) = task.await { warn!("unexpected error in task shutdown: {:?}", err); } - } - }) - .await; - match shutdown_done { - Ok(_) => trace!("tasks finished in time, shutdown complete"), - Err(_elapsed) => { - // shutdown all tasks - warn!( - "tasks didn't finish in time, aborting remaining {}/3 tasks", - tasks.len() - ); - tasks.shutdown().await; + }) + .await; + match shutdown_done { + Ok(_) => trace!("tasks finished in time, shutdown complete"), + Err(time::Elapsed { .. }) => { + // Dropping the task will abort itt + warn!("tasks didn't finish in time, aborting"); + } } } + + self.msock.closed.store(true, Ordering::SeqCst); + trace!("magicsock closed"); } } @@ -1500,44 +1459,69 @@ fn default_quic_client_config() -> rustls::ClientConfig { .with_no_client_auth() } -#[derive(Debug, Default)] -struct DiscoSecrets(std::sync::Mutex>); +#[derive(Debug)] +struct DiscoState { + /// Encryption key for this node. + secret_encryption_key: crypto_box::SecretKey, + /// The state for an active DiscoKey. + secrets: std::sync::Mutex>, + /// Disco (ping) queue + sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>, +} -impl DiscoSecrets { - fn get(&self, secret: &crypto_box::SecretKey, node_id: PublicKey, cb: F) -> T - where - F: FnOnce(&mut SharedSecret) -> T, - { - let mut inner = self.0.lock().expect("poisoned"); - let x = inner.entry(node_id).or_insert_with(|| { - let public_key = public_ed_box(&node_id.public()); - SharedSecret::new(secret, &public_key) - }); - cb(x) +impl DiscoState { + fn new( + secret_encryption_key: crypto_box::SecretKey, + ) -> (Self, mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>) { + let (disco_sender, disco_receiver) = mpsc::channel(256); + + ( + Self { + secret_encryption_key, + secrets: Default::default(), + sender: disco_sender, + }, + disco_receiver, + ) + } + fn try_send(&self, dst: SendAddr, node_id: PublicKey, msg: disco::Message) -> Result<()> { + self.sender + .try_send((dst, node_id, msg)) + .map_err(|_| anyhow::anyhow!("channel full")) } fn encode_and_seal( &self, - this_secret_key: &crypto_box::SecretKey, this_node_id: NodeId, other_node_id: NodeId, msg: &disco::Message, ) -> Bytes { let mut seal = msg.as_bytes(); - self.get(this_secret_key, other_node_id, |secret| { - secret.seal(&mut seal) - }); + self.get_secret(other_node_id, |secret| secret.seal(&mut seal)); disco::encode_message(&this_node_id, seal).into() } + fn unseal_and_decode( &self, - secret: &crypto_box::SecretKey, node_id: PublicKey, - mut sealed_box: Vec, + sealed_box: &[u8], ) -> Result { - self.get(secret, node_id, |secret| secret.open(&mut sealed_box))?; + let mut sealed_box = sealed_box.to_vec(); + self.get_secret(node_id, |secret| secret.open(&mut sealed_box))?; disco::Message::from_bytes(&sealed_box).map_err(DiscoBoxError::Parse) } + + fn get_secret(&self, node_id: PublicKey, cb: F) -> T + where + F: FnOnce(&mut SharedSecret) -> T, + { + let mut inner = self.secrets.lock().expect("poisoned"); + let x = inner.entry(node_id).or_insert_with(|| { + let public_key = public_ed_box(&node_id.public()); + SharedSecret::new(&self.secret_encryption_key, &public_key) + }); + cb(x) + } } #[derive(Debug, thiserror::Error)] @@ -1610,11 +1594,10 @@ impl AsyncUdpSocket for MagicUdpSocket { #[derive(Debug)] enum ActorMessage { - Shutdown, PingActions(Vec), EndpointPingExpired(usize, stun_rs::TransactionId), - NetReport(Result>>, &'static str), NetworkChange, + ScheduleDirectAddrUpdate(UpdateReason, Option<(NodeId, RelayUrl)>), #[cfg(test)] ForceNetworkChange(bool), } @@ -1622,28 +1605,20 @@ enum ActorMessage { struct Actor { msock: Arc, msg_receiver: mpsc::Receiver, - msg_sender: mpsc::Sender, /// When set, is an AfterFunc timer that will call MagicSock::do_periodic_stun. periodic_re_stun_timer: time::Interval, - /// The `NetInfo` provided in the last call to `net_info_func`. It's used to deduplicate calls to netInfoFunc. - net_info_last: Option, - - #[cfg(not(wasm_browser))] - port_mapper: portmapper::Client, - - /// Configuration for net report - net_report_config: net_report::Options, - - /// Whether IPv4 UDP is known to be unable to transmit - /// at all. This could happen if the socket is in an invalid state - /// (as can happen on darwin after a network link status change). - no_v4_send: bool, - - /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings. - net_reporter: net_report::Client, network_monitor: netmon::Monitor, + netmon_watcher: n0_watcher::Direct, network_change_sender: transports::NetworkChangeSender, + /// Indicates the direct addr update state. + direct_addr_update_state: DirectAddrUpdateState, + direct_addr_done_rx: mpsc::Receiver<()>, + + /// List of CallMeMaybe disco messages that should be sent out after + /// the next endpoint update completes + pending_call_me_maybes: HashMap, + disco_receiver: mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>, } #[cfg(not(wasm_browser))] @@ -1699,19 +1674,21 @@ fn bind_ip( impl Actor { async fn run( mut self, + shutdown_token: CancellationToken, mut watcher: impl Watcher> + Send + Sync, sender: UdpSender, ) -> Result<()> { // Setup network monitoring - let mut netmon_watcher = self.network_monitor.interface_state(); - let mut current_netmon_state = netmon_watcher.get()?; + let mut current_netmon_state = self.netmon_watcher.get()?; #[cfg(not(wasm_browser))] let mut direct_addr_heartbeat_timer = time::interval(HEARTBEAT_INTERVAL); - let mut direct_addr_update_receiver = - self.msock.direct_addr_update_state.running.subscribe(); + #[cfg(not(wasm_browser))] - let mut portmap_watcher = self.port_mapper.watch_external_address(); + let mut portmap_watcher = self + .direct_addr_update_state + .port_mapper + .watch_external_address(); let mut discovery_events: BoxStream = Box::pin(n0_future::stream::empty()); if let Some(d) = self.msock.discovery() { @@ -1724,6 +1701,8 @@ impl Actor { #[cfg_attr(wasm_browser, allow(unused_mut))] let mut portmap_watcher_closed = false; + let mut net_report_watcher = self.msock.net_report.watch(); + loop { self.msock.metrics.magicsock.actor_tick_main.inc(); #[cfg(not(wasm_browser))] @@ -1737,6 +1716,10 @@ impl Actor { let direct_addr_heartbeat_timer_tick = n0_future::future::pending(); tokio::select! { + _ = shutdown_token.cancelled() => { + debug!("shutting down"); + return Ok(()); + } msg = self.msg_receiver.recv(), if !receiver_closed => { let Some(msg) = msg else { trace!("tick: magicsock receiver closed"); @@ -1748,14 +1731,12 @@ impl Actor { trace!(?msg, "tick: msg"); self.msock.metrics.magicsock.actor_tick_msg.inc(); - if self.handle_actor_message(msg, &sender).await { - return Ok(()); - } + self.handle_actor_message(msg, &sender).await; } tick = self.periodic_re_stun_timer.tick() => { trace!("tick: re_stun {:?}", tick); self.msock.metrics.magicsock.actor_tick_re_stun.inc(); - self.msock.re_stun("periodic"); + self.re_stun(UpdateReason::Periodic); } new_addr = watcher.updated() => { match new_addr { @@ -1770,6 +1751,32 @@ impl Actor { } } } + report = net_report_watcher.updated() => { + match report { + Ok((report, _)) => { + self.handle_net_report_report(report); + #[cfg(not(wasm_browser))] + { + self.periodic_re_stun_timer = new_re_stun_timer(true); + } + } + Err(_) => { + warn!("net report watcher stopped"); + } + } + } + reason = self.direct_addr_done_rx.recv() => { + match reason { + Some(()) => { + // check if a new run needs to be scheduled + let state = self.netmon_watcher.get().expect("disconnected"); + self.direct_addr_update_state.try_run(state.into()); + } + None => { + warn!("direct addr watcher died"); + } + } + } change = portmap_watcher_changed, if !portmap_watcher_closed => { #[cfg(not(wasm_browser))] { @@ -1785,7 +1792,7 @@ impl Actor { self.msock.metrics.magicsock.actor_tick_portmap_changed.inc(); let new_external_address = *portmap_watcher.borrow(); debug!("external address updated: {new_external_address:?}"); - self.msock.re_stun("portmap_updated"); + self.re_stun(UpdateReason::PortmapUpdated); } #[cfg(wasm_browser)] let _unused_in_browsers = change; @@ -1805,15 +1812,7 @@ impl Actor { self.handle_ping_actions(&sender, msgs).await; } } - _ = direct_addr_update_receiver.changed() => { - let reason = *direct_addr_update_receiver.borrow(); - trace!("tick: direct addr update receiver {:?}", reason); - self.msock.metrics.magicsock.actor_tick_direct_addr_update_receiver.inc(); - if let Some(reason) = reason { - self.refresh_direct_addrs(reason).await; - } - } - state = netmon_watcher.updated() => { + state = self.netmon_watcher.updated() => { let Ok(state) = state else { trace!("tick: link change receiver closed"); self.msock.metrics.magicsock.actor_tick_other.inc(); @@ -1843,6 +1842,11 @@ impl Actor { // Send the discovery item to the subscribers of the discovery broadcast stream. self.msock.discovery_subscribers.send(discovery_item); } + Some((dst, dst_key, msg)) = self.disco_receiver.recv() => { + if let Err(err) = self.msock.send_disco_message(&sender, dst.clone(), dst_key, msg).await { + warn!(%dst, node = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)"); + } + } } } } @@ -1857,13 +1861,19 @@ impl Actor { #[cfg(not(wasm_browser))] self.msock.dns_resolver.clear_cache(); - self.msock.re_stun("link-change-major"); + self.re_stun(UpdateReason::LinkChangeMajor); self.reset_endpoint_states(); } else { - self.msock.re_stun("link-change-minor"); + self.re_stun(UpdateReason::LinkChangeMinor); } } + fn re_stun(&mut self, why: UpdateReason) { + let state = self.netmon_watcher.get().expect("disconnected"); + self.direct_addr_update_state + .schedule_run(why, state.into()); + } + #[instrument(skip_all)] async fn handle_ping_actions(&mut self, sender: &UdpSender, msgs: Vec) { if let Err(err) = self.msock.send_ping_actions(sender, msgs).await { @@ -1874,38 +1884,22 @@ impl Actor { /// Processes an incoming actor message. /// /// Returns `true` if it was a shutdown. - async fn handle_actor_message(&mut self, msg: ActorMessage, sender: &UdpSender) -> bool { + async fn handle_actor_message(&mut self, msg: ActorMessage, sender: &UdpSender) { match msg { - ActorMessage::Shutdown => { - debug!("shutting down"); - - self.msock.node_map.notify_shutdown(); - #[cfg(not(wasm_browser))] - self.port_mapper.deactivate(); - - debug!("shutdown complete"); - return true; - } ActorMessage::EndpointPingExpired(id, txid) => { self.msock.node_map.notify_ping_timeout(id, txid); } - ActorMessage::NetReport(report, why) => { - match report { - Ok(report) => { - self.handle_net_report_report(report).await; - } - Err(err) => { - warn!( - "failed to generate net_report report for: {}: {:?}", - why, err - ); - } - } - self.finalize_direct_addrs_update(why); - } ActorMessage::NetworkChange => { self.network_monitor.network_change().await.ok(); } + ActorMessage::ScheduleDirectAddrUpdate(why, data) => { + if let Some((node, url)) = data { + self.pending_call_me_maybes.insert(node, url); + } + let state = self.netmon_watcher.get().expect("disconnected"); + self.direct_addr_update_state + .schedule_run(why, state.into()); + } #[cfg(test)] ActorMessage::ForceNetworkChange(is_major) => { self.handle_network_change(is_major); @@ -1914,25 +1908,6 @@ impl Actor { self.handle_ping_actions(sender, ping_actions).await; } } - - false - } - - /// Refreshes knowledge about our direct addresses. - /// - /// In other words, this triggers a net_report run. - /// - /// Note that invoking this is managed by the [`DirectAddrUpdateState`] and this should - /// never be invoked directly. Some day this will be refactored to not allow this easy - /// mistake to be made. - #[instrument(level = "debug", skip_all)] - async fn refresh_direct_addrs(&mut self, why: &'static str) { - self.msock.metrics.magicsock.update_direct_addrs.inc(); - - debug!("starting direct addr update ({})", why); - #[cfg(not(wasm_browser))] - self.port_mapper.procure_mapping(); - self.update_net_info(why).await; } /// Updates the direct addresses of this magic socket. @@ -1944,8 +1919,11 @@ impl Actor { /// - A net_report report. /// - The local interfaces IP addresses. #[cfg(not(wasm_browser))] - fn update_direct_addresses(&mut self, net_report_report: Option>) { - let portmap_watcher = self.port_mapper.watch_external_address(); + fn update_direct_addresses(&mut self, net_report_report: Option<&net_report::Report>) { + let portmap_watcher = self + .direct_addr_update_state + .port_mapper + .watch_external_address(); // We only want to have one DirectAddr for each SocketAddr we have. So we store // this as a map of SocketAddr -> DirectAddrType. At the end we will construct a @@ -1958,7 +1936,6 @@ impl Actor { addrs .entry(portmap_ext) .or_insert(DirectAddrType::Portmapped); - self.set_net_info_have_port_map(); } // Next add STUN addresses from the net_report report. @@ -2008,7 +1985,6 @@ impl Actor { .zip(self.msock.ip_local_addrs()) .collect(); - let msock = self.msock.clone(); let has_ipv4_unspecified = local_addrs.iter().find_map(|(_, a)| { if a.is_ipv4() && a.ip().is_unspecified() { Some(a.port()) @@ -2024,248 +2000,96 @@ impl Actor { } }); - // The following code can be slow, we do not want to block the caller since it would - // block the actor loop. - task::spawn( - async move { - // If a socket is bound to the unspecified address, create SocketAddrs for - // each local IP address by pairing it with the port the socket is bound on. - if local_addrs - .iter() - .any(|(_, local)| local.ip().is_unspecified()) - { - // Depending on the OS and network interfaces attached and their state - // enumerating the local interfaces can take a long time. Especially - // Windows is very slow. - let LocalAddresses { - regular: mut ips, - loopback, - } = tokio::task::spawn_blocking(LocalAddresses::new) - .await - .expect("spawn panicked"); - if ips.is_empty() && addrs.is_empty() { - // Include loopback addresses only if there are no other interfaces - // or public addresses, this allows testing offline. - ips = loopback; - } - - for ip in ips { - let port_if_unspecified = match ip { - IpAddr::V4(_) => has_ipv4_unspecified, - IpAddr::V6(_) => has_ipv6_unspecified, - }; - if let Some(port) = port_if_unspecified { - let addr = SocketAddr::new(ip, port); - addrs.entry(addr).or_insert(DirectAddrType::Local); - } - } - } - - // If a socket is bound to a specific address, add it. - for (bound, local) in local_addrs { - if !bound.ip().is_unspecified() { - addrs.entry(local).or_insert(DirectAddrType::Local); - } + // If a socket is bound to the unspecified address, create SocketAddrs for + // each local IP address by pairing it with the port the socket is bound on. + if local_addrs + .iter() + .any(|(_, local)| local.ip().is_unspecified()) + { + let LocalAddresses { + regular: mut ips, + loopback, + } = self + .netmon_watcher + .get() + .expect("netmon disconnected") + .local_addresses; + if ips.is_empty() && addrs.is_empty() { + // Include loopback addresses only if there are no other interfaces + // or public addresses, this allows testing offline. + ips = loopback; + } + + for ip in ips { + let port_if_unspecified = match ip { + IpAddr::V4(_) => has_ipv4_unspecified, + IpAddr::V6(_) => has_ipv6_unspecified, + }; + if let Some(port) = port_if_unspecified { + let addr = SocketAddr::new(ip, port); + addrs.entry(addr).or_insert(DirectAddrType::Local); } - - // Finally create and store store all these direct addresses and send any - // queued call-me-maybe messages. - msock.store_direct_addresses( - addrs - .iter() - .map(|(addr, typ)| DirectAddr { - addr: *addr, - typ: *typ, - }) - .collect(), - ); - msock.send_queued_call_me_maybes(); - } - .instrument(Span::current()), - ); - } - - /// Called when a direct addr update is done, no matter if it was successful or not. - fn finalize_direct_addrs_update(&mut self, why: &'static str) { - let new_why = self.msock.direct_addr_update_state.next_update(); - if !self.msock.is_closed() { - if let Some(new_why) = new_why { - self.msock.direct_addr_update_state.run(new_why); - return; - } - #[cfg(not(wasm_browser))] - { - self.periodic_re_stun_timer = new_re_stun_timer(true); - } - } - - self.msock.direct_addr_update_state.finish_run(); - debug!("direct addr update done ({})", why); - } - - /// Updates `NetInfo.HavePortMap` to true. - #[instrument(level = "debug", skip_all)] - fn set_net_info_have_port_map(&mut self) { - if let Some(ref mut net_info_last) = self.net_info_last { - if net_info_last.have_port_map { - // No change. - return; } - net_info_last.have_port_map = true; - self.net_info_last = Some(net_info_last.clone()); } - } - #[instrument(level = "debug", skip_all)] - async fn call_net_info_callback(&mut self, ni: NetInfo) { - if let Some(ref net_info_last) = self.net_info_last { - if ni.basically_equal(net_info_last) { - return; + // If a socket is bound to a specific address, add it. + for (bound, local) in local_addrs { + if !bound.ip().is_unspecified() { + addrs.entry(local).or_insert(DirectAddrType::Local); } } - self.net_info_last = Some(ni); + // Finally create and store store all these direct addresses and send any + // queued call-me-maybe messages. + self.msock.store_direct_addresses( + addrs + .iter() + .map(|(addr, typ)| DirectAddr { + addr: *addr, + typ: *typ, + }) + .collect(), + ); + self.send_queued_call_me_maybes(); } - /// Calls net_report. - /// - /// Note that invoking this is managed by [`DirectAddrUpdateState`] via - /// [`Actor::refresh_direct_addrs`] and this should never be invoked directly. Some day - /// this will be refactored to not allow this easy mistake to be made. - #[instrument(level = "debug", skip_all)] - async fn update_net_info(&mut self, why: &'static str) { - // Don't start a net report probe if we know - // we are shutting down - if self.msock.is_closing() || self.msock.is_closed() { - debug!("skipping net_report, socket is shutting down"); - return; - } - if self.msock.relay_map.is_empty() { - debug!("skipping net_report, empty RelayMap"); - self.msg_sender - .send(ActorMessage::NetReport(Ok(None), why)) - .await - .ok(); - return; - } - - let relay_map = self.msock.relay_map.clone(); - let opts = self.net_report_config.clone(); + fn send_queued_call_me_maybes(&mut self) { + let msg = self.msock.direct_addrs.to_call_me_maybe_message(); + let msg = disco::Message::CallMeMaybe(msg); + // allocate, to minimize locking duration - debug!("requesting net_report report"); - match self.net_reporter.get_report_channel(relay_map, opts).await { - Ok(rx) => { - let msg_sender = self.msg_sender.clone(); - task::spawn(async move { - let report = time::timeout(NET_REPORT_TIMEOUT, rx).await; - let report: anyhow::Result<_> = match report { - Ok(Ok(Ok(report))) => Ok(Some(report)), - Ok(Ok(Err(err))) => Err(err), - Ok(Err(_)) => Err(anyhow!("net_report report not received")), - Err(err) => Err(anyhow!("net_report report timeout: {:?}", err)), - }; - msg_sender - .send(ActorMessage::NetReport(report, why)) - .await - .ok(); - // The receiver of the NetReport message will call - // .finalize_direct_addrs_update(). - }); - } - Err(err) => { - warn!("unable to start net_report generation: {:?}", err); - self.finalize_direct_addrs_update(why); + for (public_key, url) in self.pending_call_me_maybes.drain() { + if self + .msock + .disco + .try_send(SendAddr::Relay(url), public_key, msg.clone()) + .is_err() + { + warn!(node = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe"); } } } - async fn handle_net_report_report(&mut self, report: Option>) { - if let Some(ref report) = report { - // only returns Err if the report hasn't changed. - self.msock.net_report.set(Some(report.clone())).ok(); - self.msock - .ipv6_reported - .store(report.ipv6, Ordering::Relaxed); - let r = &report; - trace!( - "setting no_v4_send {} -> {}", - self.no_v4_send, - !r.ipv4_can_send - ); - self.no_v4_send = !r.ipv4_can_send; - - #[cfg(not(wasm_browser))] - let have_port_map = self.port_mapper.watch_external_address().borrow().is_some(); - #[cfg(wasm_browser)] - let have_port_map = false; - - let mut ni = NetInfo { - relay_latency: Default::default(), - mapping_varies_by_dest_ip: r.mapping_varies_by_dest_ip, - hair_pinning: r.hair_pinning, - #[cfg(not(wasm_browser))] - portmap_probe: r.portmap_probe.clone(), - have_port_map, - working_ipv6: Some(r.ipv6), - os_has_ipv6: Some(r.os_has_ipv6), - working_udp: Some(r.udp), - working_icmp_v4: r.icmpv4, - working_icmp_v6: r.icmpv6, - preferred_relay: r.preferred_relay.clone(), - }; - for (rid, d) in r.relay_v4_latency.iter() { - ni.relay_latency - .insert(format!("{rid}-v4"), d.as_secs_f64()); - } - for (rid, d) in r.relay_v6_latency.iter() { - ni.relay_latency - .insert(format!("{rid}-v6"), d.as_secs_f64()); - } - - if ni.preferred_relay.is_none() { - // Perhaps UDP is blocked. Pick a deterministic but arbitrary one. - ni.preferred_relay = self.pick_relay_fallback(); + fn handle_net_report_report(&mut self, mut report: Option) { + if let Some(ref mut r) = report { + self.msock.ipv6_reported.store(r.ipv6, Ordering::Relaxed); + if r.preferred_relay.is_none() { + if let Some(my_relay) = self.msock.my_relay() { + r.preferred_relay.replace(my_relay); + } } // Notify all transports - self.network_change_sender.on_network_change(&ni); - - // TODO: set link type - self.call_net_info_callback(ni).await; - } - #[cfg(not(wasm_browser))] - self.update_direct_addresses(report); - } - - /// Returns a deterministic relay node to connect to. This is only used if net_report - /// couldn't find the nearest one, for instance, if UDP is blocked and thus STUN - /// latency checks aren't working. - /// - /// If no the [`RelayMap`] is empty, returns `0`. - fn pick_relay_fallback(&self) -> Option { - // TODO: figure out which relay node most of our nodes are using, - // and use that region as our fallback. - // - // If we already had selected something in the past and it has any - // nodes, we want to stay on it. If there are no nodes at all, - // stay on whatever relay we previously picked. If we need to pick - // one and have no node info, pick a node randomly. - // - // We used to do the above for legacy clients, but never updated it for disco. - - let my_relay = self.msock.my_relay(); - if my_relay.is_some() { - return my_relay; + self.network_change_sender.on_network_change(r); } - let ids = self.msock.relay_map.urls().collect::>(); - let mut rng = rand::rngs::StdRng::seed_from_u64(0); - ids.choose(&mut rng).map(|c| (*c).clone()) + #[cfg(not(wasm_browser))] + self.update_direct_addresses(report.as_ref()); } /// Resets the preferred address for all nodes. /// This is called when connectivity changes enough that we no longer trust the old routes. - #[instrument(skip_all, fields(me = %self.msock.me))] + #[instrument(skip_all)] fn reset_endpoint_states(&mut self) { self.msock.node_map.reset_node_states() } @@ -2538,88 +2362,11 @@ impl Display for DirectAddrType { } } -/// Contains information about the host's network state. -#[derive(Debug, Clone, PartialEq)] -pub(crate) struct NetInfo { - /// Says whether the host's NAT mappings vary based on the destination IP. - mapping_varies_by_dest_ip: Option, - - /// If their router does hairpinning. It reports true even if there's no NAT involved. - hair_pinning: Option, - - /// Whether the host has IPv6 internet connectivity. - working_ipv6: Option, - - /// Whether the OS supports IPv6 at all, regardless of whether IPv6 internet connectivity is available. - os_has_ipv6: Option, - - /// Whether the host has UDP internet connectivity. - working_udp: Option, - - /// Whether ICMPv4 works, `None` means not checked. - working_icmp_v4: Option, - - /// Whether ICMPv6 works, `None` means not checked. - working_icmp_v6: Option, - - /// Whether we have an existing portmap open (UPnP, PMP, or PCP). - have_port_map: bool, - - /// Probe indicating the presence of port mapping protocols on the LAN. - #[cfg(not(wasm_browser))] - portmap_probe: Option, - - /// This node's preferred relay server for incoming traffic. - /// - /// The node might be be temporarily connected to multiple relay servers (to send to - /// other nodes) but this is the relay on which you can always contact this node. Also - /// known as home relay. - preferred_relay: Option, - - /// The fastest recent time to reach various relay STUN servers, in seconds. - /// - /// This should only be updated rarely, or when there's a - /// material change, as any change here also gets uploaded to the control plane. - relay_latency: BTreeMap, -} - -impl NetInfo { - /// Checks if this is probably still the same network as *other*. - /// - /// This tries to compare the network situation, without taking into account things - /// expected to change a little like e.g. latency to the relay server. - fn basically_equal(&self, other: &Self) -> bool { - let eq_icmp_v4 = match (self.working_icmp_v4, other.working_icmp_v4) { - (Some(slf), Some(other)) => slf == other, - _ => true, // ignore for comparison if only one report had this info - }; - let eq_icmp_v6 = match (self.working_icmp_v6, other.working_icmp_v6) { - (Some(slf), Some(other)) => slf == other, - _ => true, // ignore for comparison if only one report had this info - }; - - #[cfg(not(wasm_browser))] - let probe_eq = self.portmap_probe == other.portmap_probe; - #[cfg(wasm_browser)] - let probe_eq = true; - - self.mapping_varies_by_dest_ip == other.mapping_varies_by_dest_ip - && self.hair_pinning == other.hair_pinning - && self.working_ipv6 == other.working_ipv6 - && self.os_has_ipv6 == other.os_has_ipv6 - && self.working_udp == other.working_udp - && eq_icmp_v4 - && eq_icmp_v6 - && self.have_port_map == other.have_port_map - && probe_eq - && self.preferred_relay == other.preferred_relay - } -} - #[cfg(test)] mod tests { - use anyhow::Context; + use anyhow::{anyhow, Context}; use rand::RngCore; + use tokio::task::JoinSet; use tokio_util::task::AbortOnDropHandle; use tracing_test::traced_test; diff --git a/iroh/src/magicsock/metrics.rs b/iroh/src/magicsock/metrics.rs index b6d7fe5d44c..803a829bd48 100644 --- a/iroh/src/magicsock/metrics.rs +++ b/iroh/src/magicsock/metrics.rs @@ -8,7 +8,6 @@ use serde::{Deserialize, Serialize}; #[non_exhaustive] #[metrics(name = "magicsock")] pub struct Metrics { - pub re_stun_calls: Counter, pub update_direct_addrs: Counter, // Sends (data or disco) @@ -66,7 +65,6 @@ pub struct Metrics { pub actor_tick_re_stun: Counter, pub actor_tick_portmap_changed: Counter, pub actor_tick_direct_addr_heartbeat: Counter, - pub actor_tick_direct_addr_update_receiver: Counter, pub actor_link_change: Counter, pub actor_tick_other: Counter, diff --git a/iroh/src/magicsock/node_map.rs b/iroh/src/magicsock/node_map.rs index 519519c827e..a4d177cccdf 100644 --- a/iroh/src/magicsock/node_map.rs +++ b/iroh/src/magicsock/node_map.rs @@ -265,13 +265,6 @@ impl NodeMap { Some((public_key, udp_addr, relay_url, ping_actions)) } - pub(super) fn notify_shutdown(&self) { - let mut inner = self.inner.lock().expect("poisoned"); - for (_, ep) in inner.node_states_mut() { - ep.reset(); - } - } - pub(super) fn reset_node_states(&self) { let mut inner = self.inner.lock().expect("poisoned"); for (_, ep) in inner.node_states_mut() { diff --git a/iroh/src/magicsock/node_map/best_addr.rs b/iroh/src/magicsock/node_map/best_addr.rs index 18d9ef960be..5c451a9187a 100644 --- a/iroh/src/magicsock/node_map/best_addr.rs +++ b/iroh/src/magicsock/node_map/best_addr.rs @@ -57,7 +57,6 @@ pub(super) enum State<'a> { #[derive(Debug, Clone, Copy)] pub enum ClearReason { - Reset, Inactive, PongTimeout, MatchesOurLocalAddr, diff --git a/iroh/src/magicsock/node_map/node_state.rs b/iroh/src/magicsock/node_map/node_state.rs index fc67aca4d24..15b0870777f 100644 --- a/iroh/src/magicsock/node_map/node_state.rs +++ b/iroh/src/magicsock/node_map/node_state.rs @@ -6,7 +6,6 @@ use std::{ use data_encoding::HEXLOWER; use iroh_base::{NodeAddr, NodeId, PublicKey, RelayUrl}; -use iroh_relay::protos::stun; use n0_future::{ task::{self, AbortOnDropHandle}, time::{self, Duration, Instant}, @@ -67,7 +66,7 @@ pub(in crate::magicsock) struct SendPing { pub id: usize, pub dst: SendAddr, pub dst_node: NodeId, - pub tx_id: stun::TransactionId, + pub tx_id: stun_rs::TransactionId, pub purpose: DiscoPingPurpose, } @@ -114,7 +113,7 @@ pub(super) struct NodeState { /// The fallback/bootstrap path, if non-zero (non-zero for well-behaved clients). relay_url: Option<(RelayUrl, PathState)>, udp_paths: NodeUdpPaths, - sent_pings: HashMap, + sent_pings: HashMap, /// Last time this node was used. /// /// A node is marked as in use when sending datagrams to them, or when having received @@ -429,7 +428,7 @@ impl NodeState { /// Cleanup the expired ping for the passed in txid. #[instrument("disco", skip_all, fields(node = %self.node_id.fmt_short()))] - pub(super) fn ping_timeout(&mut self, txid: stun::TransactionId) { + pub(super) fn ping_timeout(&mut self, txid: stun_rs::TransactionId) { if let Some(sp) = self.sent_pings.remove(&txid) { debug!(tx = %HEXLOWER.encode(&txid), addr = %sp.to, "pong not received in timeout"); match sp.to { @@ -487,7 +486,7 @@ impl NodeState { return None; // Similar to `RelayOnly` mode, we don't send UDP pings for hole-punching. } - let tx_id = stun::TransactionId::default(); + let tx_id = stun_rs::TransactionId::default(); trace!(tx = %HEXLOWER.encode(&tx_id), %dst, ?purpose, dst = %self.node_id.fmt_short(), "start ping"); event!( @@ -511,7 +510,7 @@ impl NodeState { pub(super) fn ping_sent( &mut self, to: SendAddr, - tx_id: stun::TransactionId, + tx_id: stun_rs::TransactionId, purpose: DiscoPingPurpose, sender: mpsc::Sender, ) { @@ -706,19 +705,6 @@ impl NodeState { debug!(new = ?new_addrs , %paths, "added new direct paths for endpoint"); } - /// Clears all the endpoint's p2p state, reverting it to a relay-only endpoint. - #[instrument(skip_all, fields(node = %self.node_id.fmt_short()))] - pub(super) fn reset(&mut self) { - self.last_full_ping = None; - self.udp_paths - .best_addr - .clear(ClearReason::Reset, self.relay_url.is_some()); - - for es in self.udp_paths.paths.values_mut() { - es.last_ping = None; - } - } - /// Handle a received Disco Ping. /// /// - Ensures the paths the ping was received on is a known path for this endpoint. @@ -731,7 +717,7 @@ impl NodeState { pub(super) fn handle_ping( &mut self, path: SendAddr, - tx_id: stun::TransactionId, + tx_id: stun_rs::TransactionId, ) -> PingHandled { let now = Instant::now(); diff --git a/iroh/src/magicsock/node_map/path_state.rs b/iroh/src/magicsock/node_map/path_state.rs index 7241121722a..2d6855cab30 100644 --- a/iroh/src/magicsock/node_map/path_state.rs +++ b/iroh/src/magicsock/node_map/path_state.rs @@ -6,7 +6,6 @@ use std::{ }; use iroh_base::NodeId; -use iroh_relay::protos::stun; use n0_future::time::{Duration, Instant}; use tracing::{debug, event, Level}; @@ -39,7 +38,7 @@ pub(super) struct PathState { /// If non-zero, means that this was an endpoint that we learned about at runtime (from an /// incoming ping). If so, we keep the time updated and use it to discard old candidates. // NOTE: tx_id Originally added in tailscale due to . - last_got_ping: Option<(Instant, stun::TransactionId)>, + last_got_ping: Option<(Instant, stun_rs::TransactionId)>, /// The time this endpoint was last advertised via a call-me-maybe DISCO message. pub(super) call_me_maybe_time: Option, @@ -107,7 +106,7 @@ impl PathState { pub(super) fn with_ping( node_id: NodeId, path: SendAddr, - tx_id: stun::TransactionId, + tx_id: stun_rs::TransactionId, source: Source, now: Instant, ) -> Self { @@ -239,7 +238,7 @@ impl PathState { } } - pub(super) fn handle_ping(&mut self, tx_id: stun::TransactionId, now: Instant) -> PingRole { + pub(super) fn handle_ping(&mut self, tx_id: stun_rs::TransactionId, now: Instant) -> PingRole { if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) { PingRole::Duplicate } else { diff --git a/iroh/src/magicsock/transports.rs b/iroh/src/magicsock/transports.rs index f6d4e735304..ae0a1c5fa5b 100644 --- a/iroh/src/magicsock/transports.rs +++ b/iroh/src/magicsock/transports.rs @@ -21,7 +21,8 @@ pub(crate) use self::ip::IpTransport; #[cfg(not(wasm_browser))] use self::ip::{IpNetworkChangeSender, IpSender}; pub(crate) use self::relay::{RelayActorConfig, RelayTransport}; -use super::{MagicSock, NetInfo}; +use super::MagicSock; +use crate::net_report::Report; /// Manages the different underlying data transports that the magicsock /// can support. @@ -262,14 +263,14 @@ pub(crate) struct NetworkChangeSender { } impl NetworkChangeSender { - pub(crate) fn on_network_change(&self, info: &NetInfo) { + pub(crate) fn on_network_change(&self, report: &Report) { #[cfg(not(wasm_browser))] for ip in &self.ip { - ip.on_network_change(info); + ip.on_network_change(report); } for relay in &self.relay { - relay.on_network_change(info); + relay.on_network_change(report); } } @@ -369,10 +370,6 @@ impl Addr { pub fn is_relay(&self) -> bool { matches!(self, Self::Relay(..)) } - - pub fn is_ip(&self) -> bool { - matches!(self, Self::Ip(..)) - } } #[derive(Debug)] diff --git a/iroh/src/magicsock/transports/ip.rs b/iroh/src/magicsock/transports/ip.rs index cc4b945d541..a941d31402c 100644 --- a/iroh/src/magicsock/transports/ip.rs +++ b/iroh/src/magicsock/transports/ip.rs @@ -87,10 +87,6 @@ impl IpTransport { } } - pub(crate) fn socket(&self) -> Arc { - self.socket.clone() - } - pub(super) fn create_sender(&self) -> IpSender { let sender = self.socket.clone().create_sender(); IpSender { @@ -116,7 +112,7 @@ impl IpNetworkChangeSender { Ok(()) } - pub(super) fn on_network_change(&self, _info: &crate::magicsock::NetInfo) { + pub(super) fn on_network_change(&self, _info: &crate::magicsock::Report) { // Nothing to do for now } } diff --git a/iroh/src/magicsock/transports/relay.rs b/iroh/src/magicsock/transports/relay.rs index 0a9d9ef89c0..9345bed6af2 100644 --- a/iroh/src/magicsock/transports/relay.rs +++ b/iroh/src/magicsock/transports/relay.rs @@ -142,8 +142,10 @@ pub(super) struct RelayNetworkChangeSender { } impl RelayNetworkChangeSender { - pub(super) fn on_network_change(&self, info: &crate::magicsock::NetInfo) { - self.send_relay_actor(RelayActorMessage::NetworkChange { info: info.clone() }); + pub(super) fn on_network_change(&self, report: &crate::magicsock::Report) { + self.send_relay_actor(RelayActorMessage::NetworkChange { + report: report.clone(), + }); } pub(super) fn rebind(&self) -> io::Result<()> { diff --git a/iroh/src/magicsock/transports/relay/actor.rs b/iroh/src/magicsock/transports/relay/actor.rs index 47d5a9c97b0..42ece2b92b8 100644 --- a/iroh/src/magicsock/transports/relay/actor.rs +++ b/iroh/src/magicsock/transports/relay/actor.rs @@ -61,7 +61,8 @@ use url::Url; #[cfg(not(wasm_browser))] use crate::dns::DnsResolver; use crate::{ - magicsock::{Metrics as MagicsockMetrics, NetInfo, RelayContents}, + magicsock::{Metrics as MagicsockMetrics, RelayContents}, + net_report::Report, util::MaybeFuture, }; @@ -794,7 +795,7 @@ impl ConnectedRelayState { pub(super) enum RelayActorMessage { MaybeCloseRelaysOnRebind, - NetworkChange { info: NetInfo }, + NetworkChange { report: Report }, } #[derive(Debug, Clone)] @@ -920,8 +921,8 @@ impl RelayActor { async fn handle_msg(&mut self, msg: RelayActorMessage) { match msg { - RelayActorMessage::NetworkChange { info } => { - self.on_network_change(info).await; + RelayActorMessage::NetworkChange { report } => { + self.on_network_change(report).await; } RelayActorMessage::MaybeCloseRelaysOnRebind => { self.maybe_close_relays_on_rebind().await; @@ -957,19 +958,19 @@ impl RelayActor { } } - async fn on_network_change(&mut self, info: NetInfo) { + async fn on_network_change(&mut self, report: Report) { let my_relay = self.config.my_relay.get(); - if info.preferred_relay == my_relay { + if report.preferred_relay == my_relay { // No change. return; } let old_relay = self .config .my_relay - .set(info.preferred_relay.clone()) + .set(report.preferred_relay.clone()) .unwrap_or_else(|e| e); - if let Some(relay_url) = info.preferred_relay { + if let Some(relay_url) = report.preferred_relay { self.config.metrics.relay_home_change.inc(); // On change, notify all currently connected relay servers and diff --git a/iroh/src/net_report.rs b/iroh/src/net_report.rs index fb87d4e58c4..74ae17530e8 100644 --- a/iroh/src/net_report.rs +++ b/iroh/src/net_report.rs @@ -10,42 +10,28 @@ #![cfg_attr(not(test), deny(clippy::unwrap_used))] #![cfg_attr(wasm_browser, allow(unused))] -use std::{ - collections::{BTreeMap, HashMap}, - fmt::{self, Debug}, - net::{SocketAddr, SocketAddrV4, SocketAddrV6}, - sync::Arc, -}; +use std::{collections::BTreeMap, fmt::Debug, sync::Arc}; -use anyhow::{anyhow, Result}; -use bytes::Bytes; -use iroh_base::RelayUrl; +use anyhow::Result; #[cfg(not(wasm_browser))] use iroh_relay::dns::DnsResolver; -use iroh_relay::{protos::stun, RelayMap}; -use n0_future::{ - task::{self, AbortOnDropHandle}, - time::{Duration, Instant}, -}; +use iroh_relay::RelayMap; +use n0_future::time::{self, Duration, Instant}; #[cfg(not(wasm_browser))] use netwatch::UdpSocket; -use tokio::sync::{self, mpsc, oneshot}; -use tracing::{debug, error, info_span, trace, warn, Instrument}; +use reportgen::{ProbeFinished, ProbeReport}; +use tracing::{debug, trace}; mod defaults; #[cfg(not(wasm_browser))] mod dns; mod ip_mapped_addrs; mod metrics; -#[cfg(not(wasm_browser))] -mod ping; +mod report; mod reportgen; mod options; -#[cfg(not(wasm_browser))] -pub use stun_utils::bind_local_stun_socket; - /// We "vendor" what we need of the library in browsers for simplicity. /// /// We could consider making `portmapper` compile to wasm in the future, @@ -66,11 +52,16 @@ pub(crate) mod portmapper { } pub(crate) use ip_mapped_addrs::{IpMappedAddr, IpMappedAddresses}; -pub use metrics::Metrics; -pub use options::Options; -pub use reportgen::QuicConfig; + #[cfg(not(wasm_browser))] -use reportgen::SocketState; +use self::reportgen::SocketState; +pub use self::{ + metrics::Metrics, + options::Options, + report::{RelayLatencies, Report}, + reportgen::{IfStateDetails, QuicConfig}, +}; +use crate::util::MaybeFuture; const FULL_REPORT_INTERVAL: Duration = Duration::from_secs(5 * 60); @@ -83,116 +74,7 @@ const FULL_REPORT_INTERVAL: Duration = Duration::from_secs(5 * 60); /// default which will never be used. const DEFAULT_MAX_LATENCY: Duration = Duration::from_millis(100); -/// A net_report report. -/// -/// Can be obtained by calling [`Client::get_report`]. -#[derive(Default, Debug, PartialEq, Eq, Clone)] -pub struct Report { - /// A UDP STUN round trip completed. - pub udp: bool, - /// An IPv6 STUN round trip completed. - pub ipv6: bool, - /// An IPv4 STUN round trip completed. - pub ipv4: bool, - /// An IPv6 packet was able to be sent - pub ipv6_can_send: bool, - /// an IPv4 packet was able to be sent - pub ipv4_can_send: bool, - /// could bind a socket to ::1 - pub os_has_ipv6: bool, - /// An ICMPv4 round trip completed, `None` if not checked. - pub icmpv4: Option, - /// An ICMPv6 round trip completed, `None` if not checked. - pub icmpv6: Option, - /// Whether STUN results depend on which STUN server you're talking to (on IPv4). - pub mapping_varies_by_dest_ip: Option, - /// Whether STUN results depend on which STUN server you're talking to (on IPv6). - /// - /// Note that we don't really expect this to happen and are merely logging this if - /// detecting rather than using it. For now. - pub mapping_varies_by_dest_ipv6: Option, - /// Whether the router supports communicating between two local devices through the NATted - /// public IP address (on IPv4). - pub hair_pinning: Option, - /// Probe indicating the presence of port mapping protocols on the LAN. - pub portmap_probe: Option, - /// `None` for unknown - pub preferred_relay: Option, - /// keyed by relay Url - pub relay_latency: RelayLatencies, - /// keyed by relay Url - pub relay_v4_latency: RelayLatencies, - /// keyed by relay Url - pub relay_v6_latency: RelayLatencies, - /// ip:port of global IPv4 - pub global_v4: Option, - /// `[ip]:port` of global IPv6 - pub global_v6: Option, - /// CaptivePortal is set when we think there's a captive portal that is - /// intercepting HTTP traffic. - pub captive_portal: Option, -} - -impl fmt::Display for Report { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self, f) - } -} - -/// Latencies per relay node. -#[derive(Debug, Default, PartialEq, Eq, Clone)] -pub struct RelayLatencies(BTreeMap); - -impl RelayLatencies { - fn new() -> Self { - Default::default() - } - - /// Updates a relay's latency, if it is faster than before. - fn update_relay(&mut self, url: RelayUrl, latency: Duration) { - let val = self.0.entry(url).or_insert(latency); - if latency < *val { - *val = latency; - } - } - - /// Merges another [`RelayLatencies`] into this one. - /// - /// For each relay the latency is updated using [`RelayLatencies::update_relay`]. - fn merge(&mut self, other: &RelayLatencies) { - for (url, latency) in other.iter() { - self.update_relay(url.clone(), latency); - } - } - - /// Returns the maximum latency for all relays. - /// - /// If there are not yet any latencies this will return [`DEFAULT_MAX_LATENCY`]. - fn max_latency(&self) -> Duration { - self.0 - .values() - .max() - .copied() - .unwrap_or(DEFAULT_MAX_LATENCY) - } - - /// Returns an iterator over all the relays and their latencies. - pub fn iter(&self) -> impl Iterator + '_ { - self.0.iter().map(|(k, v)| (k, *v)) - } - - fn len(&self) -> usize { - self.0.len() - } - - fn is_empty(&self) -> bool { - self.0.is_empty() - } - - fn get(&self, url: &RelayUrl) -> Option { - self.0.get(url).copied() - } -} +const ENOUGH_NODES: usize = 3; /// Client to run net_reports. /// @@ -205,14 +87,25 @@ impl RelayLatencies { /// While running the net_report actor expects to be passed all received stun packets using /// `Addr::receive_stun_packet`. #[derive(Debug)] -pub struct Client { - /// Channel to send message to the [`Actor`]. +pub(crate) struct Client { + /// The port mapper client, if those are requested. + /// + /// The port mapper is responsible for talking to routers via UPnP and the like to try + /// and open ports. + #[cfg(not(wasm_browser))] + port_mapper: Option, + /// The DNS resolver to use for probes that need to perform DNS lookups + #[cfg(not(wasm_browser))] + dns_resolver: DnsResolver, + /// The [`IpMappedAddresses`] that allows you to do QAD in iroh + #[cfg(not(wasm_browser))] + ip_mapped_addrs: Option, + metrics: Arc, + + /// A collection of previously generated reports. /// - /// If all senders are dropped, in other words all clones of this struct are dropped, - /// the actor will terminate. - addr: Addr, - /// Ensures the actor is terminated when the client is dropped. - _drop_guard: Arc>, + /// Sometimes it is useful to look at past reports to decide what to do. + reports: Reports, } #[derive(Debug)] @@ -220,9 +113,9 @@ struct Reports { /// Do a full relay scan, even if last is `Some`. next_full: bool, /// Some previous reports. - prev: HashMap>, + prev: BTreeMap, /// Most recent report. - last: Option>, + last: Option, /// Time of last full (non-incremental) report. last_full: Instant, } @@ -230,7 +123,7 @@ struct Reports { impl Default for Reports { fn default() -> Self { Self { - next_full: Default::default(), + next_full: true, prev: Default::default(), last: Default::default(), last_full: Instant::now(), @@ -240,16 +133,13 @@ impl Default for Reports { impl Client { /// Creates a new net_report client. - /// - /// This starts a connected actor in the background. Once the client is dropped it will - /// stop running. - pub fn new( + pub(crate) fn new( #[cfg(not(wasm_browser))] port_mapper: Option, #[cfg(not(wasm_browser))] dns_resolver: DnsResolver, #[cfg(not(wasm_browser))] ip_mapped_addrs: Option, metrics: Arc, ) -> Result { - let mut actor = Actor::new( + Ok(Client { #[cfg(not(wasm_browser))] port_mapper, #[cfg(not(wasm_browser))] @@ -257,333 +147,30 @@ impl Client { #[cfg(not(wasm_browser))] ip_mapped_addrs, metrics, - )?; - let addr = actor.addr(); - let task = task::spawn( - async move { actor.run().await }.instrument(info_span!("net_report.actor")), - ); - let drop_guard = AbortOnDropHandle::new(task); - Ok(Client { - addr, - _drop_guard: Arc::new(drop_guard), + reports: Reports::default(), }) } - /// Returns a new address to send messages to this actor. - /// - /// Unlike the client itself the returned [`Addr`] does not own the actor task, it only - /// allows sending messages to the actor. - pub fn addr(&self) -> Addr { - self.addr.clone() - } - - /// Runs a net_report, returning the report. - /// - /// It may not be called concurrently with itself, `&mut self` takes care of that. - /// - /// The *stun_conn4* and *stun_conn6* endpoints are bound UDP sockets to use to send out - /// STUN packets. This function **will not read from the sockets**, as they may be - /// receiving other traffic as well, normally they are the sockets carrying the real - /// traffic. Thus all stun packets received on those sockets should be passed to - /// `Addr::receive_stun_packet` in order for this function to receive the stun - /// responses and function correctly. - /// - /// If these are not passed in this will bind sockets for STUN itself, though results - /// may not be as reliable. - /// - /// The *quic_config* takes a [`QuicConfig`], a combination of a QUIC endpoint and - /// a client configuration that can be use for verifying the relay server connection. - /// When available, the report will attempt to get an observed public address - /// using QUIC address discovery. - /// - /// When `None`, it will disable the QUIC address discovery probes. - /// - /// This will attempt to use *all* probe protocols. - #[cfg(test)] - pub async fn get_report_all( - &mut self, - relay_map: RelayMap, - #[cfg(not(wasm_browser))] stun_sock_v4: Option>, - #[cfg(not(wasm_browser))] stun_sock_v6: Option>, - #[cfg(not(wasm_browser))] quic_config: Option, - ) -> Result> { - #[cfg(not(wasm_browser))] - let opts = Options::default() - .stun_v4(stun_sock_v4) - .stun_v6(stun_sock_v6) - .quic_config(quic_config); - #[cfg(wasm_browser)] - let opts = Options::default(); - let rx = self.get_report_channel(relay_map.clone(), opts).await?; - match rx.await { - Ok(res) => res, - Err(_) => Err(anyhow!("channel closed, actor awol")), - } - } - - /// Runs a net_report, returning the report. - /// - /// It may not be called concurrently with itself, `&mut self` takes care of that. + /// Generates a [`Report`]. /// /// Look at [`Options`] for the different configuration options. - pub async fn get_report(&mut self, relay_map: RelayMap, opts: Options) -> Result> { - let rx = self.get_report_channel(relay_map, opts).await?; - match rx.await { - Ok(res) => res, - Err(_) => Err(anyhow!("channel closed, actor awol")), - } - } - - /// Get report with channel - /// - /// Look at [`Options`] for the different configuration options. - pub(crate) async fn get_report_channel( + pub(crate) async fn get_report( &mut self, relay_map: RelayMap, + if_state: IfStateDetails, opts: Options, - ) -> Result>>> { - let (tx, rx) = oneshot::channel(); - self.addr - .send(Message::RunCheck { - relay_map, - opts, - response_tx: tx, - }) - .await?; - Ok(rx) - } -} - -#[derive(Debug)] -pub(crate) struct Inflight { - /// The STUN transaction ID. - txn: stun::TransactionId, - /// The time the STUN probe was sent. - start: Instant, - /// Response to send STUN results: latency of STUN response and the discovered address. - s: sync::oneshot::Sender<(Duration, SocketAddr)>, -} - -/// Messages to send to the [`Actor`]. -#[derive(Debug)] -#[allow(clippy::large_enum_variant)] -pub(crate) enum Message { - /// Run a net_report. - /// - /// Only one net_report can be run at a time, trying to run multiple concurrently will - /// fail. - RunCheck { - /// The map of relays we want to probe - relay_map: RelayMap, - /// Options for the report - opts: Options, - /// Channel to receive the response. - response_tx: oneshot::Sender>>, - }, - /// A report produced by the [`reportgen`] actor. - ReportReady { report: Box }, - /// The [`reportgen`] actor failed to produce a report. - ReportAborted { err: anyhow::Error }, - /// An incoming STUN packet to parse. - StunPacket { - /// The raw UDP payload. - payload: Bytes, - /// The address this was claimed to be received from. - from_addr: SocketAddr, - }, - /// A probe wants to register an in-flight STUN request. - /// - /// The sender is signalled once the STUN packet is registered with the actor and will - /// correctly accept the STUN response. - InFlightStun(Inflight, oneshot::Sender<()>), -} - -/// Sender to the main service. -/// -/// Unlike [`Client`] this is the raw channel to send messages over. Keeping this alive -/// will not keep the actor alive, which makes this handy to pass to internal tasks. -#[derive(Debug, Clone)] -pub struct Addr { - sender: mpsc::Sender, - metrics: Arc, -} - -impl Addr { - /// Pass a received STUN packet to the net_reporter. - /// - /// Normally the UDP sockets to send STUN messages from are passed in so that STUN - /// packets are sent from the sockets that carry the real traffic. However because - /// these sockets carry real traffic they will also receive non-STUN traffic, thus the - /// net_report actor does not read from the sockets directly. If you receive a STUN - /// packet on the socket you should pass it to this method. - /// - /// It is safe to call this even when the net_report actor does not currently have any - /// in-flight STUN probes. The actor will simply ignore any stray STUN packets. - /// - /// There is an implicit queue here which may drop packets if the actor does not keep up - /// consuming them. - pub fn receive_stun_packet(&self, payload: Bytes, src: SocketAddr) { - if let Err(mpsc::error::TrySendError::Full(_)) = self.sender.try_send(Message::StunPacket { - payload, - from_addr: src, - }) { - self.metrics.stun_packets_dropped.inc(); - warn!("dropping stun packet from {}", src); - } - } - - async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> { - self.sender.send(msg).await.inspect_err(|_| { - error!("net_report actor lost"); - }) - } -} - -/// The net_report actor. -/// -/// This actor runs for the entire duration there's a [`Client`] connected. -#[derive(Debug)] -struct Actor { - // Actor plumbing. - /// Actor messages channel. - /// - /// If there are no more senders the actor stops. - receiver: mpsc::Receiver, - /// The sender side of the messages channel. - /// - /// This allows creating new [`Addr`]s from the actor. - sender: mpsc::Sender, - /// A collection of previously generated reports. - /// - /// Sometimes it is useful to look at past reports to decide what to do. - reports: Reports, - - // Actor configuration. - /// The port mapper client, if those are requested. - /// - /// The port mapper is responsible for talking to routers via UPnP and the like to try - /// and open ports. - #[cfg(not(wasm_browser))] - port_mapper: Option, - - // Actor state. - /// Information about the currently in-flight STUN requests. - /// - /// This is used to complete the STUN probe when receiving STUN packets. - in_flight_stun_requests: HashMap, - /// The [`reportgen`] actor currently generating a report. - current_report_run: Option, - - /// The DNS resolver to use for probes that need to perform DNS lookups - #[cfg(not(wasm_browser))] - dns_resolver: DnsResolver, - - /// The [`IpMappedAddresses`] that allows you to do QAD in iroh - #[cfg(not(wasm_browser))] - ip_mapped_addrs: Option, - metrics: Arc, -} - -impl Actor { - /// Creates a new actor. - /// - /// This does not start the actor, see [`Actor::run`] for this. You should not - /// normally create this directly but rather create a [`Client`]. - fn new( - #[cfg(not(wasm_browser))] port_mapper: Option, - #[cfg(not(wasm_browser))] dns_resolver: DnsResolver, - #[cfg(not(wasm_browser))] ip_mapped_addrs: Option, - metrics: Arc, - ) -> Result { - // TODO: consider an instrumented flume channel so we have metrics. - let (sender, receiver) = mpsc::channel(32); - Ok(Self { - receiver, - sender, - reports: Default::default(), - #[cfg(not(wasm_browser))] - port_mapper, - in_flight_stun_requests: Default::default(), - current_report_run: None, - #[cfg(not(wasm_browser))] - dns_resolver, - #[cfg(not(wasm_browser))] - ip_mapped_addrs, - metrics, - }) - } - - /// Returns the channel to send messages to the actor. - fn addr(&self) -> Addr { - Addr { - sender: self.sender.clone(), - metrics: self.metrics.clone(), - } - } + ) -> Result { + debug!("net_report starting"); - /// Run the actor. - /// - /// It will now run and handle messages. Once the connected [`Client`] (including all - /// its clones) is dropped this will terminate. - async fn run(&mut self) { - debug!("net_report actor starting"); - while let Some(msg) = self.receiver.recv().await { - trace!(?msg, "handling message"); - match msg { - Message::RunCheck { - relay_map, - opts, - response_tx, - } => { - self.handle_run_check(relay_map, opts, response_tx); - } - Message::ReportReady { report } => { - self.handle_report_ready(*report); - } - Message::ReportAborted { err } => { - self.handle_report_aborted(err); - } - Message::StunPacket { payload, from_addr } => { - self.handle_stun_packet(&payload, from_addr); - } - Message::InFlightStun(inflight, response_tx) => { - self.handle_in_flight_stun(inflight, response_tx); - } - } - } - } - - /// Starts a check run as requested by the [`Message::RunCheck`] message. - /// - /// If *stun_sock_v4* or *stun_sock_v6* are not provided this will bind the sockets - /// itself. This is not ideal since really you want to send STUN probes from the - /// sockets you will be using. - fn handle_run_check( - &mut self, - relay_map: RelayMap, - opts: Options, - response_tx: oneshot::Sender>>, - ) { let protocols = opts.to_protocols(); #[cfg(not(wasm_browser))] let socket_state = SocketState { port_mapper: self.port_mapper.clone(), - stun_sock4: opts.stun_sock_v4, - stun_sock6: opts.stun_sock_v6, quic_config: opts.quic_config, dns_resolver: self.dns_resolver.clone(), ip_mapped_addrs: self.ip_mapped_addrs.clone(), }; trace!("Attempting probes for protocols {protocols:#?}"); - if self.current_report_run.is_some() { - response_tx - .send(Err(anyhow!( - "ignoring RunCheck request: reportgen actor already running" - ))) - .ok(); - return; - } - let now = Instant::now(); let mut do_full = self.reports.next_full @@ -605,113 +192,112 @@ impl Actor { } self.metrics.reports.inc(); - let actor = reportgen::Client::new( - self.addr(), + let enough_relays = std::cmp::min(relay_map.len(), ENOUGH_NODES); + #[cfg(wasm_browser)] + let if_state = IfStateDetails::default(); + #[cfg(not(wasm_browser))] + let if_state = IfStateDetails { + have_v4: if_state.have_v4, + have_v6: if_state.have_v6, + }; + + let (actor, mut probe_rx) = reportgen::Client::new( self.reports.last.clone(), relay_map, protocols, - self.metrics.clone(), + if_state, #[cfg(not(wasm_browser))] socket_state, #[cfg(any(test, feature = "test-utils"))] opts.insecure_skip_relay_cert_verify, ); - self.current_report_run = Some(ReportRun { - _reportgen: actor, - report_tx: response_tx, - }); - } - - fn handle_report_ready(&mut self, report: Report) { - let report = self.finish_and_store_report(report); - self.in_flight_stun_requests.clear(); - if let Some(ReportRun { report_tx, .. }) = self.current_report_run.take() { - report_tx.send(Ok(report)).ok(); - } - } - - fn handle_report_aborted(&mut self, err: anyhow::Error) { - self.in_flight_stun_requests.clear(); - if let Some(ReportRun { report_tx, .. }) = self.current_report_run.take() { - report_tx.send(Err(err.context("report aborted"))).ok(); - } - } + let mut report = Report { + os_has_ipv6: os_has_ipv6(), + ..Default::default() + }; - /// Handles [`Message::StunPacket`]. - /// - /// If there are currently no in-flight stun requests registered this is dropped, - /// otherwise forwarded to the probe. - fn handle_stun_packet(&mut self, pkt: &[u8], src: SocketAddr) { - trace!(%src, "received STUN packet"); - if self.in_flight_stun_requests.is_empty() { - return; - } + let mut timeout_fut = std::pin::pin!(MaybeFuture::default()); - #[cfg(feature = "metrics")] - match &src { - SocketAddr::V4(_) => { - self.metrics.stun_packets_recv_ipv4.inc(); - } - SocketAddr::V6(_) => { - self.metrics.stun_packets_recv_ipv6.inc(); - } - } + loop { + tokio::select! { + biased; - match stun::parse_response(pkt) { - Ok((txn, addr_port)) => match self.in_flight_stun_requests.remove(&txn) { - Some(inf) => { - debug!(%src, %txn, "received known STUN packet"); - let elapsed = inf.start.elapsed(); - inf.s.send((elapsed, addr_port)).ok(); - } - None => { - debug!(%src, %txn, "received unexpected STUN message response"); + _ = &mut timeout_fut, if timeout_fut.is_some() => { + drop(actor); // shuts down the probes + break; } - }, - Err(err) => { - match stun::parse_binding_request(pkt) { - Ok(txn) => { - // Is this our hairpin request? - match self.in_flight_stun_requests.remove(&txn) { - Some(inf) => { - debug!(%src, %txn, "received our hairpin STUN request"); - let elapsed = inf.start.elapsed(); - inf.s.send((elapsed, src)).ok(); + + maybe_probe = probe_rx.recv() => { + let Some(probe_res) = maybe_probe else { + break; + }; + trace!(?probe_res, "handling probe"); + match probe_res { + ProbeFinished::Regular(probe) => match probe { + Ok(probe) => { + report.update(&probe); + if timeout_fut.is_none() { + if let Some(timeout) = self.have_enough_reports(enough_relays, &report) { + timeout_fut.as_mut().set_future(time::sleep(timeout)); + } + } } - None => { - debug!(%src, %txn, "unknown STUN request"); + Err(err) => { + trace!("probe errored: {:?}", err); } + }, + #[cfg(not(wasm_browser))] + ProbeFinished::CaptivePortal(portal) => { + report.captive_portal = portal; + } + #[cfg(not(wasm_browser))] + ProbeFinished::Portmap(portmap) => { + report.portmap_probe = portmap; } - } - Err(_) => { - debug!(%src, "received invalid STUN response: {err:#}"); } } } } - } - /// Handles [`Message::InFlightStun`]. - /// - /// The in-flight request is added to [`Actor::in_flight_stun_requests`] so that - /// [`Actor::handle_stun_packet`] can forward packets correctly. - /// - /// *response_tx* is to signal the actor message has been handled. - fn handle_in_flight_stun(&mut self, inflight: Inflight, response_tx: oneshot::Sender<()>) { - self.in_flight_stun_requests.insert(inflight.txn, inflight); - response_tx.send(()).ok(); - } - - fn finish_and_store_report(&mut self, report: Report) -> Arc { - let report = self.add_report_history_and_set_preferred_relay(report); + self.add_report_history_and_set_preferred_relay(&mut report); debug!("{report:?}"); - report + + Ok(report) + } + + fn have_enough_reports(&self, enough_relays: usize, report: &Report) -> Option { + // Once we've heard from enough relay servers (3), start a timer to give up on the other + // probes. The timer's duration is a function of whether this is our initial full + // probe or an incremental one. For incremental ones, wait for the duration of the + // slowest relay. For initial ones, double that. + let latencies: Vec = report.relay_latency.iter().map(|(_, l)| l).collect(); + let have_enough_latencies = latencies.len() >= enough_relays; + + if have_enough_latencies { + let timeout = latencies + .iter() + .max() + .copied() + .unwrap_or(DEFAULT_MAX_LATENCY); + let timeout = match self.reports.last.is_some() { + true => timeout, + false => timeout * 2, + }; + debug!( + reports=latencies.len(), + delay=?timeout, + "Have enough probe reports, aborting further probes soon", + ); + + Some(timeout) + } else { + None + } } /// Adds `r` to the set of recent Reports and mutates `r.preferred_relay` to contain the best recent one. - /// `r` is stored ref counted and a reference is returned. - fn add_report_history_and_set_preferred_relay(&mut self, mut r: Report) -> Arc { + fn add_report_history_and_set_preferred_relay(&mut self, r: &mut Report) { let mut prev_relay = None; if let Some(ref last) = self.reports.last { prev_relay.clone_from(&last.preferred_relay); @@ -720,15 +306,14 @@ impl Actor { const MAX_AGE: Duration = Duration::from_secs(5 * 60); // relay ID => its best recent latency in last MAX_AGE - let mut best_recent = RelayLatencies::new(); + let mut best_recent = RelayLatencies::default(); // chain the current report as we are still mutating it let prevs_iter = self .reports .prev .iter() - .map(|(a, b)| -> (&Instant, &Report) { (a, b) }) - .chain(std::iter::once((&now, &r))); + .map(|(a, b)| -> (&Instant, &Report) { (a, b) }); let mut to_remove = Vec::new(); for (t, pr) in prevs_iter { @@ -738,6 +323,8 @@ impl Actor { } best_recent.merge(&pr.relay_latency); } + // merge in current run + best_recent.merge(&r.relay_latency); for t in to_remove { self.reports.prev.remove(&t); @@ -772,23 +359,11 @@ impl Actor { } } - let r = Arc::new(r); self.reports.prev.insert(now, r.clone()); self.reports.last = Some(r.clone()); - - r } } -/// State the net_report actor needs for an in-progress report generation. -#[derive(Debug)] -struct ReportRun { - /// The handle of the [`reportgen`] actor, cancels the actor on drop. - _reportgen: reportgen::Client, - /// Where to send the completed report. - report_tx: oneshot::Sender>>, -} - /// Test if IPv6 works at all, or if it's been hard disabled at the OS level. #[cfg(not(wasm_browser))] fn os_has_ipv6() -> bool { @@ -801,81 +376,6 @@ fn os_has_ipv6() -> bool { false } -#[cfg(not(wasm_browser))] -pub(crate) mod stun_utils { - use anyhow::Context as _; - use netwatch::IpFamily; - use tokio_util::sync::CancellationToken; - - use super::*; - - /// Attempts to bind a local socket to send STUN packets from. - /// - /// If successful this returns the bound socket and will forward STUN responses to the - /// provided *actor_addr*. The *cancel_token* serves to stop the packet forwarding when the - /// socket is no longer needed. - pub fn bind_local_stun_socket( - network: IpFamily, - actor_addr: Addr, - cancel_token: CancellationToken, - ) -> Option> { - let sock = match UdpSocket::bind(network, 0) { - Ok(sock) => Arc::new(sock), - Err(err) => { - debug!("failed to bind STUN socket: {}", err); - return None; - } - }; - let span = info_span!( - "stun_udp_listener", - local_addr = sock - .local_addr() - .map(|a| a.to_string()) - .unwrap_or(String::from("-")), - ); - { - let sock = sock.clone(); - task::spawn( - async move { - debug!("udp stun socket listener started"); - // TODO: Can we do better for buffers here? Probably doesn't matter much. - let mut buf = vec![0u8; 64 << 10]; - loop { - tokio::select! { - biased; - _ = cancel_token.cancelled() => break, - res = recv_stun_once(&sock, &mut buf, &actor_addr) => { - if let Err(err) = res { - warn!(%err, "stun recv failed"); - break; - } - } - } - } - debug!("udp stun socket listener stopped"); - } - .instrument(span), - ); - } - Some(sock) - } - - /// Receive STUN response from a UDP socket, pass it to the actor. - async fn recv_stun_once(sock: &UdpSocket, buf: &mut [u8], actor_addr: &Addr) -> Result<()> { - let (count, mut from_addr) = sock - .recv_from(buf) - .await - .context("Error reading from stun socket")?; - let payload = &buf[..count]; - from_addr.set_ip(from_addr.ip().to_canonical()); - let msg = Message::StunPacket { - payload: Bytes::from(payload.to_vec()), - from_addr, - }; - actor_addr.send(msg).await.context("actor stopped") - } -} - #[cfg(test)] mod test_utils { //! Creates a relay server against which to perform tests @@ -891,8 +391,6 @@ mod test_utils { }); let node_desc = RelayNode { url: server.https_url().expect("should work as relay"), - stun_only: false, // the checks above and below guarantee both stun and relay - stun_port: server.stun_addr().expect("server should serve stun").port(), quic, }; @@ -917,165 +415,44 @@ mod test_utils { #[cfg(test)] mod tests { - use std::net::Ipv4Addr; + use std::net::{Ipv4Addr, SocketAddr}; - use bytes::BytesMut; - use netwatch::IpFamily; + use iroh_base::RelayUrl; use tokio_util::sync::CancellationToken; - use tracing::info; use tracing_test::traced_test; use super::*; - use crate::net_report::{dns, ping::Pinger, stun_utils::bind_local_stun_socket}; - - mod stun_utils { - //! Utils for testing that expose a simple stun server. - - use std::{net::IpAddr, sync::Arc}; - - use anyhow::Result; - use iroh_base::RelayUrl; - use iroh_relay::RelayNode; - use tokio::{ - net, - sync::{oneshot, Mutex}, - }; - use tracing::{debug, trace}; - - use super::*; - - /// A drop guard to clean up test infrastructure. - /// - /// After dropping the test infrastructure will asynchronously shutdown and release its - /// resources. - // Nightly sees the sender as dead code currently, but we only rely on Drop of the - // sender. - #[derive(Debug)] - pub struct CleanupDropGuard { - _guard: oneshot::Sender<()>, - } - - // (read_ipv4, read_ipv6) - #[derive(Debug, Default, Clone)] - pub struct StunStats(Arc>); - - impl StunStats { - pub async fn total(&self) -> usize { - let s = self.0.lock().await; - s.0 + s.1 - } - } - - pub fn relay_map_of(stun: impl Iterator) -> RelayMap { - relay_map_of_opts(stun.map(|addr| (addr, true))) - } - - pub fn relay_map_of_opts(stun: impl Iterator) -> RelayMap { - let nodes = stun.map(|(addr, stun_only)| { - let host = addr.ip(); - let port = addr.port(); - - let url: RelayUrl = format!("http://{host}:{port}").parse().unwrap(); - RelayNode { - url, - stun_port: port, - stun_only, - quic: None, - } - }); - RelayMap::from_iter(nodes) - } - - /// Sets up a simple STUN server binding to `0.0.0.0:0`. - /// - /// See [`serve`] for more details. - pub(crate) async fn serve_v4() -> Result<(SocketAddr, StunStats, CleanupDropGuard)> { - serve(std::net::Ipv4Addr::UNSPECIFIED.into()).await - } - - /// Sets up a simple STUN server. - pub(crate) async fn serve(ip: IpAddr) -> Result<(SocketAddr, StunStats, CleanupDropGuard)> { - let stats = StunStats::default(); - - let pc = net::UdpSocket::bind((ip, 0)).await?; - let mut addr = pc.local_addr()?; - match addr.ip() { - IpAddr::V4(ip) => { - if ip.octets() == [0, 0, 0, 0] { - addr.set_ip("127.0.0.1".parse().unwrap()); - } - } - _ => unreachable!("using ipv4"), - } - - println!("STUN listening on {}", addr); - let (_guard, r) = oneshot::channel(); - let stats_c = stats.clone(); - tokio::task::spawn(async move { - run_stun(pc, stats_c, r).await; - }); - - Ok((addr, stats, CleanupDropGuard { _guard })) - } - - async fn run_stun(pc: net::UdpSocket, stats: StunStats, mut done: oneshot::Receiver<()>) { - let mut buf = vec![0u8; 64 << 10]; - loop { - trace!("read loop"); - tokio::select! { - _ = &mut done => { - debug!("shutting down"); - break; - } - res = pc.recv_from(&mut buf) => match res { - Ok((n, addr)) => { - trace!("read packet {}bytes from {}", n, addr); - let pkt = &buf[..n]; - if !stun::is(pkt) { - debug!("received non STUN pkt"); - continue; - } - if let Ok(txid) = stun::parse_binding_request(pkt) { - debug!("received binding request"); - let mut s = stats.0.lock().await; - if addr.is_ipv4() { - s.0 += 1; - } else { - s.1 += 1; - } - drop(s); - - let res = stun::response(txid, addr); - if let Err(err) = pc.send_to(&res, addr).await { - eprintln!("STUN server write failed: {:?}", err); - } - } - } - Err(err) => { - eprintln!("failed to read: {:?}", err); - } - } - } - } - } - } + use crate::net_report::{dns, reportgen::ProbeProto}; #[tokio::test] #[traced_test] async fn test_basic() -> Result<()> { - let (stun_addr, stun_stats, _cleanup_guard) = - stun_utils::serve("127.0.0.1".parse().unwrap()).await?; + let (server, relay) = test_utils::relay().await; + let client_config = iroh_relay::client::make_dangerous_client_config(); + let ep = quinn::Endpoint::client(SocketAddr::new(Ipv4Addr::LOCALHOST.into(), 0))?; + let quic_addr_disc = QuicConfig { + ep: ep.clone(), + client_config, + ipv4: true, + ipv6: true, + }; + let relay_map = RelayMap::from(relay); let resolver = dns::tests::resolver(); let mut client = Client::new(None, resolver.clone(), None, Default::default())?; - let dm = stun_utils::relay_map_of([stun_addr].into_iter()); + let if_state = IfStateDetails::fake(); // Note that the ProbePlan will change with each iteration. for i in 0..5 { let cancel = CancellationToken::new(); - let sock = bind_local_stun_socket(IpFamily::V4, client.addr(), cancel.clone()); println!("--round {}", i); - let r = client.get_report_all(dm.clone(), sock, None, None).await?; + let r = client + .get_report( + relay_map.clone(), + if_state.clone(), + Options::default().quic_config(Some(quic_addr_disc.clone())), + ) + .await?; assert!(r.udp, "want UDP"); assert_eq!( @@ -1094,63 +471,8 @@ mod tests { cancel.cancel(); } - assert!( - stun_stats.total().await >= 5, - "expected at least 5 stun, got {}", - stun_stats.total().await, - ); - - Ok(()) - } - - #[tokio::test] - #[traced_test] - async fn test_udp_blocked() -> Result<()> { - // Create a "STUN server", which will never respond to anything. This is how UDP to - // the STUN server being blocked will look like from the client's perspective. - let blackhole = tokio::net::UdpSocket::bind("127.0.0.1:0").await?; - let stun_addr = blackhole.local_addr()?; - let dm = stun_utils::relay_map_of_opts([(stun_addr, false)].into_iter()); - - // Now create a client and generate a report. - let resolver = dns::tests::resolver(); - let mut client = Client::new(None, resolver.clone(), None, Default::default())?; - - let r = client.get_report_all(dm, None, None, None).await?; - let mut r: Report = (*r).clone(); - r.portmap_probe = None; - - // This test wants to ensure that the ICMP part of the probe works when UDP is - // blocked. Unfortunately on some systems we simply don't have permissions to - // create raw ICMP pings and we'll have to silently accept this test is useless (if - // we could, this would be a skip instead). - let pinger = Pinger::new(); - let can_ping = pinger.send(Ipv4Addr::LOCALHOST.into(), b"aa").await.is_ok(); - let want_icmpv4 = match can_ping { - true => Some(true), - false => None, - }; - - let want = Report { - // The ICMP probe sets the can_ping flag. - ipv4_can_send: can_ping, - // OS IPv6 test is irrelevant here, accept whatever the current machine has. - os_has_ipv6: r.os_has_ipv6, - // Captive portal test is irrelevant; accept what the current report has. - captive_portal: r.captive_portal, - // If we can ping we expect to have this. - icmpv4: want_icmpv4, - // If we had a pinger, we'll have some latencies filled in and a preferred relay - relay_latency: can_ping - .then(|| r.relay_latency.clone()) - .unwrap_or_default(), - preferred_relay: can_ping - .then_some(r.preferred_relay.clone()) - .unwrap_or_default(), - ..Default::default() - }; - - assert_eq!(r, want); + ep.wait_idle().await; + server.shutdown().await?; Ok(()) } @@ -1162,23 +484,24 @@ mod tests { } // report returns a *Report from (relay host, Duration)+ pairs. - fn report(a: impl IntoIterator) -> Option> { + fn report(a: impl IntoIterator) -> Option { let mut report = Report::default(); for (s, d) in a { assert!(s.starts_with('d'), "invalid relay server key"); let id: u16 = s[1..].parse().unwrap(); - report - .relay_latency - .0 - .insert(relay_url(id), Duration::from_secs(d)); + report.relay_latency.update_relay( + relay_url(id), + Duration::from_secs(d), + ProbeProto::QadIpv4, + ); } - Some(Arc::new(report)) + Some(report) } struct Step { /// Delay in seconds after: u64, - r: Option>, + r: Option, } struct Test { name: &'static str, @@ -1317,15 +640,14 @@ mod tests { let resolver = dns::tests::resolver(); for mut tt in tests { println!("test: {}", tt.name); - let mut actor = Actor::new(None, resolver.clone(), None, Default::default()).unwrap(); + let mut client = Client::new(None, resolver.clone(), None, Default::default()).unwrap(); for s in &mut tt.steps { // trigger the timer tokio::time::advance(Duration::from_secs(s.after)).await; - let r = Arc::try_unwrap(s.r.take().unwrap()).unwrap(); - s.r = Some(actor.add_report_history_and_set_preferred_relay(r)); + client.add_report_history_and_set_preferred_relay(s.r.as_mut().unwrap()); } let last_report = tt.steps.last().unwrap().r.clone().unwrap(); - let got = actor.reports.prev.len(); + let got = client.reports.prev.len(); let want = tt.want_prev_len; assert_eq!(got, want, "prev length"); let got = &last_report.preferred_relay; @@ -1335,61 +657,4 @@ mod tests { Ok(()) } - - #[tokio::test] - async fn test_hairpin() -> Result<()> { - // Hairpinning is initiated after we discover our own IPv4 socket address (IP + - // port) via STUN, so the test needs to have a STUN server and perform STUN over - // IPv4 first. Hairpinning detection works by sending a STUN *request* to **our own - // public socket address** (IP + port). If the router supports hairpinning the STUN - // request is returned back to us and received on our public address. This doesn't - // need to be a STUN request, but STUN already has a unique transaction ID which we - // can easily use to identify the packet. - - // Setup STUN server and create relay_map. - let (stun_addr, _stun_stats, _done) = stun_utils::serve_v4().await?; - let dm = stun_utils::relay_map_of([stun_addr].into_iter()); - dbg!(&dm); - - let resolver = dns::tests::resolver().clone(); - let mut client = Client::new(None, resolver, None, Default::default())?; - - // Set up an external socket to send STUN requests from, this will be discovered as - // our public socket address by STUN. We send back any packets received on this - // socket to the net_report client using Client::receive_stun_packet. Once we sent - // the hairpin STUN request (from a different randomly bound socket) we are sending - // it to this socket, which is forwarnding it back to our net_report client, because - // this dumb implementation just forwards anything even if it would be garbage. - // Thus hairpinning detection will declare hairpinning to work. - let sock = UdpSocket::bind_local(netwatch::IpFamily::V4, 0)?; - let sock = Arc::new(sock); - info!(addr=?sock.local_addr().unwrap(), "Using local addr"); - let task = { - let sock = sock.clone(); - let addr = client.addr.clone(); - tokio::spawn( - async move { - let mut buf = BytesMut::zeroed(64 << 10); - loop { - let (count, src) = sock.recv_from(&mut buf).await.unwrap(); - info!( - addr=?sock.local_addr().unwrap(), - %count, - "Forwarding payload to net_report client", - ); - let payload = buf.split_to(count).freeze(); - addr.receive_stun_packet(payload, src); - } - } - .instrument(info_span!("pkt-fwd")), - ) - }; - - let r = client.get_report_all(dm, Some(sock), None, None).await?; - dbg!(&r); - assert_eq!(r.hair_pinning, Some(true)); - - task.abort(); - Ok(()) - } } diff --git a/iroh/src/net_report/defaults.rs b/iroh/src/net_report/defaults.rs index 66f27097114..6d63cff4f48 100644 --- a/iroh/src/net_report/defaults.rs +++ b/iroh/src/net_report/defaults.rs @@ -29,10 +29,4 @@ pub(crate) mod timeouts { pub(crate) const CAPTIVE_PORTAL_TIMEOUT: Duration = Duration::from_secs(2); pub(crate) const DNS_TIMEOUT: Duration = Duration::from_secs(3); - - /// The amount of time we wait for a hairpinned packet to come back. - pub(crate) const HAIRPIN_CHECK_TIMEOUT: Duration = Duration::from_millis(100); - - /// Default Pinger timeout - pub(crate) const DEFAULT_PINGER_TIMEOUT: Duration = Duration::from_secs(5); } diff --git a/iroh/src/net_report/ip_mapped_addrs.rs b/iroh/src/net_report/ip_mapped_addrs.rs index 42a07f501bc..61b150fb2d9 100644 --- a/iroh/src/net_report/ip_mapped_addrs.rs +++ b/iroh/src/net_report/ip_mapped_addrs.rs @@ -10,13 +10,13 @@ use std::{ /// Can occur when converting a [`SocketAddr`] to an [`IpMappedAddr`] #[derive(Debug, thiserror::Error)] #[error("Failed to convert")] -pub struct IpMappedAddrError; +pub(crate) struct IpMappedAddrError; /// A map fake Ipv6 address with an actual IP address. /// /// It is essentially a lookup key for an IP that iroh's magicsocket knows about. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct IpMappedAddr(Ipv6Addr); +pub(crate) struct IpMappedAddr(Ipv6Addr); /// Counter to always generate unique addresses for [`IpMappedAddr`]. static IP_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1); @@ -36,7 +36,7 @@ impl IpMappedAddr { /// /// This generates a new IPv6 address in the Unique Local Address range (RFC 4193) /// which is recognised by iroh as an IP mapped address. - pub fn generate() -> Self { + pub(super) fn generate() -> Self { let mut addr = [0u8; 16]; addr[0] = Self::ADDR_PREFIXL; addr[1..6].copy_from_slice(&Self::ADDR_GLOBAL_ID); @@ -55,7 +55,7 @@ impl IpMappedAddr { /// This uses a made-up, but fixed port number. The [IpMappedAddresses`] map this is /// made for creates a unique [`IpMappedAddr`] for each IP+port and thus does not use /// the port to map back to the original [`SocketAddr`]. - pub fn private_socket_addr(&self) -> SocketAddr { + pub(crate) fn private_socket_addr(&self) -> SocketAddr { SocketAddr::new(IpAddr::from(self.0), Self::MAPPED_ADDR_PORT) } } @@ -86,10 +86,10 @@ impl std::fmt::Display for IpMappedAddr { // mechanisms for keeping track of "aliveness" and pruning address, as we do // with the `NodeMap` #[derive(Debug, Clone, Default)] -pub struct IpMappedAddresses(Arc>); +pub(crate) struct IpMappedAddresses(Arc>); #[derive(Debug, Default)] -pub struct Inner { +pub(super) struct Inner { by_mapped_addr: BTreeMap, /// Because [`std::net::SocketAddrV6`] contains extra fields besides the IP /// address and port (ie, flow_info and scope_id), the a [`std::net::SocketAddrV6`] @@ -99,18 +99,13 @@ pub struct Inner { } impl IpMappedAddresses { - /// Creates an empty [`IpMappedAddresses`]. - pub fn new() -> Self { - Self(Arc::new(std::sync::Mutex::new(Inner::default()))) - } - /// Adds a [`SocketAddr`] to the map and returns the generated [`IpMappedAddr`]. /// /// If this [`SocketAddr`] already exists in the map, it returns its /// associated [`IpMappedAddr`]. /// /// Otherwise a new [`IpMappedAddr`] is generated for it and returned. - pub fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr { + pub(super) fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr { let ip_port = (socket_addr.ip(), socket_addr.port()); let mut inner = self.0.lock().expect("poisoned"); if let Some(mapped_addr) = inner.by_ip_port.get(&ip_port) { @@ -123,14 +118,14 @@ impl IpMappedAddresses { } /// Returns the [`IpMappedAddr`] for the given [`SocketAddr`]. - pub fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option { + pub(crate) fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option { let ip_port = (socket_addr.ip(), socket_addr.port()); let inner = self.0.lock().expect("poisoned"); inner.by_ip_port.get(&ip_port).copied() } /// Returns the [`SocketAddr`] for the given [`IpMappedAddr`]. - pub fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option { + pub(crate) fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option { let inner = self.0.lock().expect("poisoned"); inner.by_mapped_addr.get(mapped_addr).copied() } diff --git a/iroh/src/net_report/options.rs b/iroh/src/net_report/options.rs index 5540f56e976..15f66c2f817 100644 --- a/iroh/src/net_report/options.rs +++ b/iroh/src/net_report/options.rs @@ -4,48 +4,21 @@ pub use imp::Options; #[cfg(not(wasm_browser))] mod imp { - use std::{collections::BTreeSet, sync::Arc}; - - use netwatch::UdpSocket; + use std::collections::BTreeSet; use crate::net_report::{reportgen::ProbeProto, QuicConfig}; /// Options for running probes /// - /// By default, will run icmp over IPv4, icmp over IPv6, and Https probes. + /// By default, will run Https probes. /// - /// Use [`Options::stun_v4`], [`Options::stun_v6`], and [`Options::quic_config`] - /// to enable STUN over IPv4, STUN over IPv6, and QUIC address discovery. + /// Use [`Options::quic_config`] to enable QUIC address discovery. #[derive(Debug, Clone)] pub struct Options { - /// Socket to send IPv4 STUN probes from. - /// - /// Responses are never read from this socket, they must be passed in via internal - /// messaging since, when used internally in iroh, the socket is also used to receive - /// other packets from in the magicsocket (`MagicSock`). - /// - /// If not provided, STUN probes will not be sent over IPv4. - pub(crate) stun_sock_v4: Option>, - /// Socket to send IPv6 STUN probes from. - /// - /// Responses are never read from this socket, they must be passed in via internal - /// messaging since, when used internally in iroh, the socket is also used to receive - /// other packets from in the magicsocket (`MagicSock`). - /// - /// If not provided, STUN probes will not be sent over IPv6. - pub(crate) stun_sock_v6: Option>, /// The configuration needed to launch QUIC address discovery probes. /// /// If not provided, will not run QUIC address discovery. pub(crate) quic_config: Option, - /// Enable icmp_v4 probes - /// - /// On by default - pub(crate) icmp_v4: bool, - /// Enable icmp_v6 probes - /// - /// On by default - pub(crate) icmp_v6: bool, /// Enable https probes /// /// On by default @@ -58,11 +31,7 @@ mod imp { impl Default for Options { fn default() -> Self { Self { - stun_sock_v4: None, - stun_sock_v6: None, quic_config: None, - icmp_v4: true, - icmp_v6: true, https: true, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: false, @@ -74,47 +43,19 @@ mod imp { /// Create an [`Options`] that disables all probes pub fn disabled() -> Self { Self { - stun_sock_v4: None, - stun_sock_v6: None, quic_config: None, - icmp_v4: false, - icmp_v6: false, https: false, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: false, } } - /// Set the ipv4 stun socket and enable ipv4 stun probes - pub fn stun_v4(mut self, sock: Option>) -> Self { - self.stun_sock_v4 = sock; - self - } - - /// Set the ipv6 stun socket and enable ipv6 stun probes - pub fn stun_v6(mut self, sock: Option>) -> Self { - self.stun_sock_v6 = sock; - self - } - /// Enable quic probes pub fn quic_config(mut self, quic_config: Option) -> Self { self.quic_config = quic_config; self } - /// Enable or disable icmp_v4 probe - pub fn icmp_v4(mut self, enable: bool) -> Self { - self.icmp_v4 = enable; - self - } - - /// Enable or disable icmp_v6 probe - pub fn icmp_v6(mut self, enable: bool) -> Self { - self.icmp_v6 = enable; - self - } - /// Enable or disable https probe pub fn https(mut self, enable: bool) -> Self { self.https = enable; @@ -131,26 +72,14 @@ mod imp { /// Turn the options into set of valid protocols pub(crate) fn to_protocols(&self) -> BTreeSet { let mut protocols = BTreeSet::new(); - if self.stun_sock_v4.is_some() { - protocols.insert(ProbeProto::StunIpv4); - } - if self.stun_sock_v6.is_some() { - protocols.insert(ProbeProto::StunIpv6); - } if let Some(ref quic) = self.quic_config { if quic.ipv4 { - protocols.insert(ProbeProto::QuicIpv4); + protocols.insert(ProbeProto::QadIpv4); } if quic.ipv6 { - protocols.insert(ProbeProto::QuicIpv6); + protocols.insert(ProbeProto::QadIpv6); } } - if self.icmp_v4 { - protocols.insert(ProbeProto::IcmpV4); - } - if self.icmp_v6 { - protocols.insert(ProbeProto::IcmpV6); - } if self.https { protocols.insert(ProbeProto::Https); } diff --git a/iroh/src/net_report/ping.rs b/iroh/src/net_report/ping.rs deleted file mode 100644 index 6ec3dff73c8..00000000000 --- a/iroh/src/net_report/ping.rs +++ /dev/null @@ -1,163 +0,0 @@ -//! Allows sending ICMP echo requests to a host in order to determine network latency. - -use std::{ - fmt::Debug, - net::IpAddr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use n0_future::time::Duration; -use surge_ping::{Client, Config, IcmpPacket, PingIdentifier, PingSequence, ICMP}; -use tracing::debug; - -use crate::net_report::defaults::timeouts::DEFAULT_PINGER_TIMEOUT as DEFAULT_TIMEOUT; - -/// Whether this error was because we couldn't create a client or a send error. -#[derive(Debug, thiserror::Error)] -pub enum PingError { - /// Could not create client, probably bind error. - #[error("Error creating ping client")] - Client(#[from] anyhow::Error), - /// Could not send ping. - #[error("Error sending ping")] - Ping(#[from] surge_ping::SurgeError), -} - -/// Allows sending ICMP echo requests to a host in order to determine network latency. -/// Will gracefully handle both IPv4 and IPv6. -#[derive(Debug, Clone, Default)] -pub struct Pinger(Arc); - -impl Debug for Inner { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Inner").finish() - } -} - -#[derive(Default)] -struct Inner { - client_v6: Mutex>, - client_v4: Mutex>, -} - -impl Pinger { - /// Create a new [Pinger]. - pub fn new() -> Self { - Default::default() - } - - /// Lazily create the ping client. - /// - /// We do this because it means we do not bind a socket until we really try to send a - /// ping. It makes it more transparent to use the pinger. - fn get_client(&self, kind: ICMP) -> Result { - let client = match kind { - ICMP::V4 => { - let mut opt_client = self.0.client_v4.lock().expect("poisoned"); - match *opt_client { - Some(ref client) => client.clone(), - None => { - let cfg = Config::builder().kind(kind).build(); - let client = Client::new(&cfg).context("failed to create IPv4 pinger")?; - *opt_client = Some(client.clone()); - client - } - } - } - ICMP::V6 => { - let mut opt_client = self.0.client_v6.lock().expect("poisoned"); - match *opt_client { - Some(ref client) => client.clone(), - None => { - let cfg = Config::builder().kind(kind).build(); - let client = Client::new(&cfg).context("failed to create IPv6 pinger")?; - *opt_client = Some(client.clone()); - client - } - } - } - }; - Ok(client) - } - - /// Send a ping request with associated data, returning the perceived latency. - pub async fn send(&self, addr: IpAddr, data: &[u8]) -> Result { - let client = match addr { - IpAddr::V4(_) => self.get_client(ICMP::V4).map_err(PingError::Client)?, - IpAddr::V6(_) => self.get_client(ICMP::V6).map_err(PingError::Client)?, - }; - let ident = PingIdentifier(rand::random()); - debug!(%addr, %ident, "Creating pinger"); - let mut pinger = client.pinger(addr, ident).await; - pinger.timeout(DEFAULT_TIMEOUT); // todo: timeout too large for net_report - match pinger.ping(PingSequence(0), data).await? { - (IcmpPacket::V4(packet), dur) => { - debug!( - "{} bytes from {}: icmp_seq={} ttl={:?} time={:0.2?}", - packet.get_size(), - packet.get_source(), - packet.get_sequence(), - packet.get_ttl(), - dur - ); - Ok(dur) - } - - (IcmpPacket::V6(packet), dur) => { - debug!( - "{} bytes from {}: icmp_seq={} hlim={} time={:0.2?}", - packet.get_size(), - packet.get_source(), - packet.get_sequence(), - packet.get_max_hop_limit(), - dur - ); - Ok(dur) - } - } - } -} - -#[cfg(test)] -mod tests { - use std::net::{Ipv4Addr, Ipv6Addr}; - - use tracing::error; - use tracing_test::traced_test; - - use super::*; - - // See net_report::reportgen::tests::test_icmp_probe_eu_relay for permissions to ping. - #[tokio::test] - #[traced_test] - async fn test_ping_localhost() { - let pinger = Pinger::new(); - - match pinger.send(Ipv4Addr::LOCALHOST.into(), b"data").await { - Ok(duration) => { - assert!(!duration.is_zero()); - } - Err(PingError::Client(err)) => { - // We don't have permission, too bad. - error!("no ping permissions: {err:#}"); - } - Err(PingError::Ping(err)) => { - panic!("ping failed: {err:#}"); - } - } - - match pinger.send(Ipv6Addr::LOCALHOST.into(), b"data").await { - Ok(duration) => { - assert!(!duration.is_zero()); - } - Err(PingError::Client(err)) => { - // We don't have permission, too bad. - error!("no ping permissions: {err:#}"); - } - Err(PingError::Ping(err)) => { - error!("ping failed, probably no IPv6 stack: {err:#}"); - } - } - } -} diff --git a/iroh/src/net_report/report.rs b/iroh/src/net_report/report.rs new file mode 100644 index 00000000000..e41231afdc9 --- /dev/null +++ b/iroh/src/net_report/report.rs @@ -0,0 +1,210 @@ +use std::{ + collections::BTreeMap, + fmt, + net::{SocketAddr, SocketAddrV4, SocketAddrV6}, + time::Duration, +}; + +use iroh_base::RelayUrl; +use tracing::warn; + +use super::{reportgen::ProbeProto, ProbeReport}; + +/// A net_report report. +#[derive(Default, Debug, PartialEq, Eq, Clone)] +pub struct Report { + /// A UDP STUN round trip completed. + pub udp: bool, + /// An IPv6 round trip completed. + pub ipv6: bool, + /// An IPv4 round trip completed. + pub ipv4: bool, + /// An IPv6 packet was able to be sent + pub ipv6_can_send: bool, + /// an IPv4 packet was able to be sent + pub ipv4_can_send: bool, + /// could bind a socket to ::1 + pub os_has_ipv6: bool, + /// Whether STUN results depend on which STUN server you're talking to (on IPv4). + pub mapping_varies_by_dest_ip: Option, + /// Whether STUN results depend on which STUN server you're talking to (on IPv6). + /// + /// Note that we don't really expect this to happen and are merely logging this if + /// detecting rather than using it. For now. + pub mapping_varies_by_dest_ipv6: Option, + /// Probe indicating the presence of port mapping protocols on the LAN. + #[cfg(not(wasm_browser))] + pub portmap_probe: Option, + /// `None` for unknown + pub preferred_relay: Option, + /// keyed by relay Url + pub relay_latency: RelayLatencies, + /// ip:port of global IPv4 + pub global_v4: Option, + /// `[ip]:port` of global IPv6 + pub global_v6: Option, + /// CaptivePortal is set when we think there's a captive portal that is + /// intercepting HTTP traffic. + pub captive_portal: Option, +} + +impl fmt::Display for Report { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self, f) + } +} + +impl Report { + /// Updates a net_report [`Report`] with a new [`ProbeReport`]. + pub(super) fn update(&mut self, probe_report: &ProbeReport) { + let relay_node = probe_report.probe.node(); + if let Some(latency) = probe_report.latency { + self.relay_latency.update_relay( + relay_node.url.clone(), + latency, + probe_report.probe.proto(), + ); + + #[cfg(not(wasm_browser))] + if matches!( + probe_report.probe.proto(), + ProbeProto::QadIpv4 | ProbeProto::QadIpv6 + ) { + self.udp = true; + + match probe_report.addr { + Some(SocketAddr::V4(ipp)) => { + self.ipv4 = true; + if self.global_v4.is_none() { + self.global_v4 = Some(ipp); + } else if self.global_v4 != Some(ipp) { + self.mapping_varies_by_dest_ip = Some(true); + } else if self.mapping_varies_by_dest_ip.is_none() { + self.mapping_varies_by_dest_ip = Some(false); + } + } + Some(SocketAddr::V6(ipp)) => { + self.ipv6 = true; + if self.global_v6.is_none() { + self.global_v6 = Some(ipp); + } else if self.global_v6 != Some(ipp) { + self.mapping_varies_by_dest_ipv6 = Some(true); + warn!("IPv6 Address detected by STUN varies by destination"); + } else if self.mapping_varies_by_dest_ipv6.is_none() { + self.mapping_varies_by_dest_ipv6 = Some(false); + } + } + None => { + // If we are here we had a relay server latency reported from a STUN probe. + // Thus we must have a reported address. + debug_assert!(probe_report.addr.is_some()); + } + } + } + } + self.ipv4_can_send |= probe_report.ipv4_can_send; + self.ipv6_can_send |= probe_report.ipv6_can_send; + } +} + +/// Latencies per relay node. +#[derive(Debug, Default, PartialEq, Eq, Clone)] +pub struct RelayLatencies { + #[cfg(not(wasm_browser))] + ipv4: BTreeMap, + #[cfg(not(wasm_browser))] + ipv6: BTreeMap, + https: BTreeMap, +} + +impl RelayLatencies { + /// Updates a relay's latency, if it is faster than before. + pub(super) fn update_relay(&mut self, url: RelayUrl, latency: Duration, probe: ProbeProto) { + let list = match probe { + ProbeProto::Https => &mut self.https, + #[cfg(not(wasm_browser))] + ProbeProto::QadIpv4 => &mut self.ipv4, + #[cfg(not(wasm_browser))] + ProbeProto::QadIpv6 => &mut self.ipv6, + }; + let old_latency = list.entry(url).or_insert(latency); + if latency < *old_latency { + *old_latency = latency; + } + } + + /// Merges another [`RelayLatencies`] into this one. + /// + /// For each relay the latency is updated using [`RelayLatencies::update_relay`]. + pub(super) fn merge(&mut self, other: &RelayLatencies) { + for (url, latency) in other.https.iter() { + self.update_relay(url.clone(), *latency, ProbeProto::Https); + } + #[cfg(not(wasm_browser))] + for (url, latency) in other.ipv4.iter() { + self.update_relay(url.clone(), *latency, ProbeProto::QadIpv4); + } + #[cfg(not(wasm_browser))] + for (url, latency) in other.ipv6.iter() { + self.update_relay(url.clone(), *latency, ProbeProto::QadIpv6); + } + } + + /// Returns an iterator over all the relays and their latencies. + #[cfg(not(wasm_browser))] + pub fn iter(&self) -> impl Iterator + '_ { + self.https + .iter() + .chain(self.ipv4.iter()) + .chain(self.ipv6.iter()) + .map(|(k, v)| (k, *v)) + } + + /// Returns an iterator over all the relays and their latencies. + #[cfg(wasm_browser)] + pub fn iter(&self) -> impl Iterator + '_ { + self.https.iter().map(|(k, v)| (k, *v)) + } + + #[cfg(not(wasm_browser))] + pub(super) fn is_empty(&self) -> bool { + self.https.is_empty() && self.ipv4.is_empty() && self.ipv6.is_empty() + } + + #[cfg(wasm_browser)] + pub(super) fn is_empty(&self) -> bool { + self.https.is_empty() + } + + #[cfg(test)] + pub(super) fn len(&self) -> usize { + self.https.len() + self.ipv4.len() + self.ipv6.len() + } + + /// Returns the lowest latency across records. + pub(super) fn get(&self, url: &RelayUrl) -> Option { + let mut list = Vec::with_capacity(3); + if let Some(val) = self.https.get(url) { + list.push(*val); + } + #[cfg(not(wasm_browser))] + if let Some(val) = self.ipv4.get(url) { + list.push(*val); + } + #[cfg(not(wasm_browser))] + if let Some(val) = self.ipv6.get(url) { + list.push(*val); + } + list.into_iter().min() + } + + #[cfg(not(wasm_browser))] + pub(super) fn ipv4(&self) -> &BTreeMap { + &self.ipv4 + } + + #[cfg(not(wasm_browser))] + pub(super) fn ipv6(&self) -> &BTreeMap { + &self.ipv6 + } +} diff --git a/iroh/src/net_report/reportgen.rs b/iroh/src/net_report/reportgen.rs index 4d2407517a0..df8e44bc08c 100644 --- a/iroh/src/net_report/reportgen.rs +++ b/iroh/src/net_report/reportgen.rs @@ -6,7 +6,6 @@ //! messages from the client. It follows roughly these steps: //! //! - Determines host IPv6 support. -//! - Creates hairpin actor. //! - Creates portmapper future. //! - Creates captive portal detection future. //! - Creates Probe Set futures. @@ -18,23 +17,15 @@ use std::{ collections::BTreeSet, - future::Future, net::{IpAddr, SocketAddr}, - pin::Pin, sync::Arc, - task::{Context, Poll}, }; use anyhow::{anyhow, bail, Context as _, Result}; use iroh_base::RelayUrl; #[cfg(not(wasm_browser))] use iroh_relay::dns::DnsResolver; -use iroh_relay::{ - defaults::{DEFAULT_RELAY_QUIC_PORT, DEFAULT_STUN_PORT}, - http::RELAY_PROBE_PATH, - protos::stun, - RelayMap, RelayNode, -}; +use iroh_relay::{defaults::DEFAULT_RELAY_QUIC_PORT, http::RELAY_PROBE_PATH, RelayMap, RelayNode}; #[cfg(wasm_browser)] use n0_future::future::Pending; use n0_future::{ @@ -42,26 +33,20 @@ use n0_future::{ time::{self, Duration, Instant}, StreamExt as _, }; -#[cfg(not(wasm_browser))] -use netwatch::{interfaces, UdpSocket}; use rand::seq::IteratorRandom; -use tokio::sync::{mpsc, oneshot}; -use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span}; +use tokio::sync::mpsc; +use tokio_util::sync::CancellationToken; +use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument}; use url::Host; #[cfg(wasm_browser)] use crate::net_report::portmapper; // We stub the library -use crate::net_report::{self, Metrics, Report}; +use crate::net_report::Report; #[cfg(not(wasm_browser))] use crate::net_report::{ - defaults::timeouts::DNS_TIMEOUT, - dns::DNS_STAGGERING_MS, - ip_mapped_addrs::IpMappedAddresses, - ping::{PingError, Pinger}, + defaults::timeouts::DNS_TIMEOUT, dns::DNS_STAGGERING_MS, ip_mapped_addrs::IpMappedAddresses, }; -#[cfg(not(wasm_browser))] -mod hairpin; mod probes; pub use probes::ProbeProto; @@ -71,17 +56,42 @@ use crate::net_report::defaults::timeouts::{ CAPTIVE_PORTAL_DELAY, CAPTIVE_PORTAL_TIMEOUT, OVERALL_REPORT_TIMEOUT, PROBES_TIMEOUT, }; -const ENOUGH_NODES: usize = 3; - -/// Holds the state for a single invocation of [`net_report::Client::get_report`]. +/// Holds the state for a single report generation. /// /// Dropping this will cancel the actor and stop the report generation. #[derive(Debug)] pub(super) struct Client { - // Addr is currently only used by child actors, so not yet exposed here. _drop_guard: AbortOnDropHandle<()>, } +/// Some details required from the interface state of the device. +#[derive(Debug, Clone, Default)] +pub struct IfStateDetails { + /// Do we have IPv4 capbilities + pub have_v4: bool, + /// Do we have IPv6 capbilities + pub have_v6: bool, +} + +impl IfStateDetails { + #[cfg(test)] + pub(super) fn fake() -> Self { + IfStateDetails { + have_v4: true, + have_v6: true, + } + } +} + +impl From for IfStateDetails { + fn from(value: netwatch::netmon::State) -> Self { + IfStateDetails { + have_v4: value.have_v4, + have_v6: value.have_v6, + } + } +} + /// Any state that depends on sockets being available in the current environment. /// /// Factored out so it can be disabled easily in browsers. @@ -90,10 +100,6 @@ pub(super) struct Client { pub(crate) struct SocketState { /// The portmapper client, if there is one. pub(crate) port_mapper: Option, - /// Socket to send IPv4 STUN requests from. - pub(crate) stun_sock4: Option>, - /// Socket so send IPv6 STUN requests from. - pub(crate) stun_sock6: Option>, /// QUIC configuration to do QUIC address Discovery pub(crate) quic_config: Option, /// The DNS resolver to use for probes that need to resolve DNS records. @@ -108,104 +114,49 @@ impl Client { /// The actor starts running immediately and only generates a single report, after which /// it shuts down. Dropping this handle will abort the actor. pub(super) fn new( - net_report: net_report::Addr, - last_report: Option>, + last_report: Option, relay_map: RelayMap, protocols: BTreeSet, - metrics: Arc, + if_state: IfStateDetails, #[cfg(not(wasm_browser))] socket_state: SocketState, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, - ) -> Self { + ) -> (Self, mpsc::Receiver) { let (msg_tx, msg_rx) = mpsc::channel(32); - let addr = Addr { - sender: msg_tx.clone(), - }; - let mut actor = Actor { + let actor = Actor { msg_tx, - msg_rx, - net_report: net_report.clone(), last_report, relay_map, - report: Report::default(), - outstanding_tasks: OutstandingTasks::default(), protocols, #[cfg(not(wasm_browser))] socket_state, - #[cfg(not(wasm_browser))] - hairpin_actor: hairpin::Client::new(net_report, addr), - metrics, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify, + if_state, }; - let task = - task::spawn(async move { actor.run().await }.instrument(info_span!("reportgen.actor"))); - Self { - _drop_guard: AbortOnDropHandle::new(task), - } - } -} - -/// The address of the reportstate [`Actor`]. -/// -/// Unlike the [`Client`] struct itself this is the raw channel to send message over. -/// Keeping this alive will not keep the actor alive, which makes this handy to pass to -/// internal tasks. -#[derive(Debug, Clone)] -pub(super) struct Addr { - sender: mpsc::Sender, -} - -impl Addr { - /// Blocking send to the actor, to be used from a non-actor future. - async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> { - trace!( - "sending {:?} to channel with cap {}", - msg, - self.sender.capacity() - ); - self.sender.send(msg).await + let task = task::spawn(actor.run().instrument(info_span!("reportgen.actor"))); + ( + Self { + _drop_guard: AbortOnDropHandle::new(task), + }, + msg_rx, + ) } } -/// Messages to send to the reportstate [`Actor`]. -#[derive(Debug)] -enum Message { - /// Set the hairpinning availability in the report. - HairpinResult(bool), - /// Check whether executing a probe would still help. - // TODO: Ideally we remove the need for this message and the logic is inverted: once we - // get a probe result we cancel all probes that are no longer needed. But for now it's - // this way around to ease conversion. - ProbeWouldHelp(Probe, Arc, oneshot::Sender), - /// Abort all remaining probes. - AbortProbes, -} - /// The reportstate actor. /// /// This actor starts, generates a single report and exits. #[derive(Debug)] struct Actor { - /// The sender of the message channel, so we can give out [`Addr`]. - msg_tx: mpsc::Sender, - /// The receiver of the message channel. - msg_rx: mpsc::Receiver, - /// The address of the net_report actor. - net_report: super::Addr, + msg_tx: mpsc::Sender, // Provided state /// The previous report, if it exists. - last_report: Option>, + last_report: Option, /// The relay configuration. relay_map: RelayMap, // Internal state. - /// The report being built. - report: Report, - /// Which tasks the [`Actor`] is still waiting on. - /// - /// This is essentially the summary of all the work the [`Actor`] is doing. - outstanding_tasks: OutstandingTasks, /// Protocols we should attempt to create probes for, if we have the correct /// configuration for that protocol. protocols: BTreeSet, @@ -213,29 +164,29 @@ struct Actor { /// Any socket-related state that doesn't exist/work in browsers #[cfg(not(wasm_browser))] socket_state: SocketState, - /// The hairpin actor. - #[cfg(not(wasm_browser))] - hairpin_actor: hairpin::Client, - metrics: Arc, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, + if_state: IfStateDetails, } -impl Actor { - fn addr(&self) -> Addr { - Addr { - sender: self.msg_tx.clone(), - } - } +#[derive(Debug)] +pub(super) enum ProbeFinished { + Regular(Result), + #[cfg(not(wasm_browser))] + Portmap(Option), + #[cfg(not(wasm_browser))] + CaptivePortal(Option), +} - async fn run(&mut self) { - match self.run_inner().await { - Ok(_) => debug!("reportgen actor finished"), - Err(err) => { - self.net_report - .send(net_report::Message::ReportAborted { err }) - .await - .ok(); +impl Actor { + async fn run(self) { + match time::timeout(OVERALL_REPORT_TIMEOUT, self.run_inner()).await { + Ok(Ok(())) => debug!("reportgen actor finished"), + Ok(Err(err)) => { + warn!("reportgen failed: {:?}", err); + } + Err(time::Elapsed { .. }) => { + warn!("reportgen timed out"); } } } @@ -244,7 +195,6 @@ impl Actor { /// /// This actor runs by: /// - /// - Creates a hairpin actor. /// - Creates a captive portal future. /// - Creates ProbeSet futures in a group of futures. /// - Runs a main loop: @@ -252,263 +202,96 @@ impl Actor { /// - Receives actor messages (sent by those futures). /// - Updates the report, cancels unneeded futures. /// - Sends the report to the net_report actor. - async fn run_inner(&mut self) -> Result<()> { - #[cfg(not(wasm_browser))] - let port_mapper = self.socket_state.port_mapper.is_some(); - #[cfg(wasm_browser)] - let port_mapper = false; - debug!(%port_mapper, "reportstate actor starting"); + async fn run_inner(self) -> Result<()> { + debug!("reportstate actor starting"); - self.report.os_has_ipv6 = super::os_has_ipv6(); - - let mut port_mapping = self.prepare_portmapper_task(); - let mut captive_task = self.prepare_captive_portal_task(); - let mut probes = self.spawn_probes_task().await?; - - let total_timer = time::sleep(OVERALL_REPORT_TIMEOUT); - tokio::pin!(total_timer); - let probe_timer = time::sleep(PROBES_TIMEOUT); - tokio::pin!(probe_timer); + let mut probes = JoinSet::default(); - loop { - trace!(awaiting = ?self.outstanding_tasks, "tick; awaiting tasks"); - if self.outstanding_tasks.all_done() { - debug!("all tasks done"); - break; - } - tokio::select! { - biased; - _ = &mut total_timer => { - trace!("tick: total_timer expired"); - bail!("report timed out"); - } + let _probes_token = self.spawn_probes_task(self.if_state.clone(), &mut probes); + let mut num_probes = probes.len(); - _ = &mut probe_timer => { - warn!("tick: probes timed out"); - // Set new timeout to not go into this branch multiple times. We need - // the abort to finish all probes normally. PROBES_TIMEOUT is - // sufficiently far in the future. - probe_timer.as_mut().reset(Instant::now() + PROBES_TIMEOUT); - probes.abort_all(); - self.handle_abort_probes(); - } + let port_token = self.prepare_portmapper_task(&mut probes); + let captive_token = self.prepare_captive_portal_task(&mut probes); - // Drive the portmapper. - pm = &mut port_mapping, if self.outstanding_tasks.port_mapper => { - debug!(report=?pm, "tick: portmapper probe report"); - self.report.portmap_probe = pm; - port_mapping.inner = None; - self.outstanding_tasks.port_mapper = false; - } + // any reports of working UDP/QUIC? + let mut have_udp = false; - // Check for probes finishing. - set_result = probes.join_next(), if self.outstanding_tasks.probes => { - trace!("tick: probes done: {:?}", set_result); - match set_result { - Some(Ok(Ok(report))) => self.handle_probe_report(report), - Some(Ok(Err(_))) => (), - Some(Err(e)) => { - warn!("probes task error: {:?}", e); - } - None => { - self.handle_abort_probes(); + // Check for probes finishing. + while let Some(probe_result) = probes.join_next().await { + trace!(?probe_result, num_probes, "processing finished probe"); + match probe_result { + Ok(report) => { + #[cfg_attr(wasm_browser, allow(irrefutable_let_patterns))] + if let ProbeFinished::Regular(report) = &report { + have_udp |= report + .as_ref() + .map(|r| r.probe.is_udp()) + .unwrap_or_default(); + num_probes -= 1; + + // If all probes are done & we have_udp cancel portmapper and captive + if num_probes == 0 { + debug!("all regular probes done"); + debug_assert!(probes.len() <= 2, "{} probes", probes.len()); + + if have_udp { + port_token.cancel(); + captive_token.cancel(); + } } } - trace!("tick: probes handled"); + self.msg_tx.send(report).await.ok(); } - - // Drive the captive task. - found = &mut captive_task, if self.outstanding_tasks.captive_task => { - trace!("tick: captive portal task done"); - self.report.captive_portal = found; - captive_task.inner = None; - self.outstanding_tasks.captive_task = false; - } - - // Handle actor messages. - msg = self.msg_rx.recv() => { - trace!("tick: msg recv: {:?}", msg); - match msg { - Some(msg) => self.handle_message(msg), - None => bail!("msg_rx closed, reportgen client must be dropped"), + Err(e) => { + if e.is_panic() { + error!("Task panicked {:?}", e); + break; } + warn!("probes task join error: {:?}", e); } } } - if !probes.is_empty() { - debug!( - "aborting {} probe sets, already have enough reports", - probes.len() - ); - drop(probes); - } - - debug!("Sending report to net_report actor"); - self.net_report - .send(net_report::Message::ReportReady { - report: Box::new(self.report.clone()), - }) - .await?; - Ok(()) } - /// Handles an actor message. - /// - /// Returns `true` if all the probes need to be aborted. - fn handle_message(&mut self, msg: Message) { - trace!(?msg, "handling message"); - match msg { - Message::HairpinResult(works) => { - self.report.hair_pinning = Some(works); - self.outstanding_tasks.hairpin = false; - } - Message::ProbeWouldHelp(probe, relay_node, response_tx) => { - let res = self.probe_would_help(probe, relay_node); - if response_tx.send(res).is_err() { - debug!("probe dropped before ProbeWouldHelp response sent"); - } - } - Message::AbortProbes => { - self.handle_abort_probes(); - } - } - } - - fn handle_probe_report(&mut self, probe_report: ProbeReport) { - debug!(?probe_report, "finished probe"); - update_report(&mut self.report, probe_report); - - // When we discover the first IPv4 address we want to start the hairpin actor. - #[cfg(not(wasm_browser))] - if let Some(ref addr) = self.report.global_v4 { - if !self.hairpin_actor.has_started() { - self.hairpin_actor.start_check(*addr); - self.outstanding_tasks.hairpin = true; - } - } - - // Once we've heard from enough relay servers (3), start a timer to give up on the other - // probes. The timer's duration is a function of whether this is our initial full - // probe or an incremental one. For incremental ones, wait for the duration of the - // slowest relay. For initial ones, double that. - let enough_relays = std::cmp::min(self.relay_map.len(), ENOUGH_NODES); - if self.report.relay_latency.len() == enough_relays { - let timeout = self.report.relay_latency.max_latency(); - let timeout = match self.last_report.is_some() { - true => timeout, - false => timeout * 2, - }; - let reportcheck = self.addr(); - debug!( - reports=self.report.relay_latency.len(), - delay=?timeout, - "Have enough probe reports, aborting further probes soon", - ); - task::spawn( - async move { - time::sleep(timeout).await; - // Because we do this after a timeout it is entirely normal that the - // actor is no longer there by the time we send this message. - reportcheck - .send(Message::AbortProbes) - .await - .map_err(|err| trace!("Failed to abort all probes: {err:#}")) - .ok(); - } - .instrument(Span::current()), - ); - } - } - - /// Whether running this probe would still improve our report. - fn probe_would_help(&mut self, probe: Probe, relay_node: Arc) -> bool { - // If the probe is for a relay we don't yet know about, that would help. - if self.report.relay_latency.get(&relay_node.url).is_none() { - return true; - } - - // If the probe is for IPv6 and we don't yet have an IPv6 report, that would help. - #[cfg(not(wasm_browser))] - if probe.proto() == ProbeProto::StunIpv6 && self.report.relay_v6_latency.is_empty() { - return true; - } - - // For IPv4, we need at least two IPv4 results overall to - // determine whether we're behind a NAT that shows us as - // different source IPs and/or ports depending on who we're - // talking to. If we don't yet have two results yet - // (`mapping_varies_by_dest_ip` is blank), then another IPv4 probe - // would be good. - #[cfg(not(wasm_browser))] - if probe.proto() == ProbeProto::StunIpv4 && self.report.mapping_varies_by_dest_ip.is_none() - { - return true; - } - - // Otherwise not interesting. - false - } - - /// Stops further probes. - /// - /// This makes sure that no further probes are run and also cancels the captive portal - /// and portmapper tasks if there were successful probes. Be sure to only handle this - /// after all the required [`ProbeReport`]s have been processed. - fn handle_abort_probes(&mut self) { - trace!("handle abort probes"); - self.outstanding_tasks.probes = false; - if self.report.udp { - self.outstanding_tasks.port_mapper = false; - self.outstanding_tasks.captive_task = false; - } - } - /// Creates the future which will perform the portmapper task. /// /// The returned future will run the portmapper, if enabled, resolving to it's result. - fn prepare_portmapper_task( - &mut self, - ) -> MaybeFuture>>>> { - // In the browser, the compiler struggles to infer the type of future inside, because it's never set. - #[cfg(wasm_browser)] - let port_mapping: MaybeFuture>>>> = - MaybeFuture::default(); - - #[cfg(not(wasm_browser))] - let mut port_mapping = MaybeFuture::default(); - + fn prepare_portmapper_task(&self, tasks: &mut JoinSet) -> CancellationToken { + let token = CancellationToken::new(); #[cfg(not(wasm_browser))] if let Some(port_mapper) = self.socket_state.port_mapper.clone() { - port_mapping.inner = Some(Box::pin(async move { - match port_mapper.probe().await { - Ok(Ok(res)) => Some(res), - Ok(Err(err)) => { - debug!("skipping port mapping: {err:?}"); - None - } - Err(recv_err) => { - warn!("skipping port mapping: {recv_err:?}"); - None - } + let token = token.clone(); + tasks.spawn( + async move { + let res = token.run_until_cancelled_owned(port_mapper.probe()).await; + let res = match res { + Some(Ok(Ok(res))) => Some(res), + Some(Ok(Err(err))) => { + debug!("skipping port mapping: {err:?}"); + None + } + Some(Err(recv_err)) => { + warn!("probe failed: {recv_err:?}"); + None + } + None => { + trace!("probe cancelled"); + None + } + }; + ProbeFinished::Portmap(res) } - })); - self.outstanding_tasks.port_mapper = true; + .instrument(debug_span!("port-mapper")), + ); } - port_mapping + token } /// Creates the future which will perform the captive portal check. - fn prepare_captive_portal_task( - &mut self, - ) -> MaybeFuture>>>> { - // In the browser case the compiler cannot infer the type of the future, because it's never set: - #[cfg(wasm_browser)] - let captive_task: MaybeFuture>>>> = MaybeFuture::default(); - - #[cfg(not(wasm_browser))] - let mut captive_task = MaybeFuture::default(); + fn prepare_captive_portal_task(&self, tasks: &mut JoinSet) -> CancellationToken { + let token = CancellationToken::new(); // If we're doing a full probe, also check for a captive portal. We // delay by a bit to wait for UDP STUN to finish, to avoid the probe if @@ -524,38 +307,48 @@ impl Actor { let dns_resolver = self.socket_state.dns_resolver.clone(); let dm = self.relay_map.clone(); - self.outstanding_tasks.captive_task = true; - captive_task.inner = Some(Box::pin(async move { - time::sleep(CAPTIVE_PORTAL_DELAY).await; - debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}"); - let captive_portal_check = time::timeout( - CAPTIVE_PORTAL_TIMEOUT, - check_captive_portal(&dns_resolver, &dm, preferred_relay) - .instrument(debug_span!("captive-portal")), - ); - match captive_portal_check.await { - Ok(Ok(found)) => Some(found), - Ok(Err(err)) => { - let err: Result = err.downcast(); - match err { - Ok(req_err) if req_err.is_connect() => { - debug!("check_captive_portal failed: {req_err:#}"); + let token = token.clone(); + tasks.spawn( + async move { + let res = token + .run_until_cancelled_owned(async move { + time::sleep(CAPTIVE_PORTAL_DELAY).await; + trace!("check started after {CAPTIVE_PORTAL_DELAY:?}"); + time::timeout( + CAPTIVE_PORTAL_TIMEOUT, + check_captive_portal(&dns_resolver, &dm, preferred_relay), + ) + .await + }) + .await; + let res = match res { + Some(Ok(Ok(found))) => Some(found), + Some(Ok(Err(err))) => { + let err: Result = err.downcast(); + match err { + Ok(req_err) if req_err.is_connect() => { + debug!("check failed: {req_err:#}"); + } + Ok(req_err) => warn!("error: {req_err:#}"), + Err(any_err) => warn!("error: {any_err:#}"), } - Ok(req_err) => warn!("check_captive_portal error: {req_err:#}"), - Err(any_err) => warn!("check_captive_portal error: {any_err:#}"), + None } - None - } - Err(_) => { - warn!("check_captive_portal timed out"); - None - } + Some(Err(time::Elapsed { .. })) => { + warn!("probe timed out"); + None + } + None => { + trace!("probe cancelled"); + None + } + }; + ProbeFinished::CaptivePortal(res) } - })); + .instrument(debug_span!("captive-portal")), + ); } - - self.outstanding_tasks.captive_task = false; - captive_task + token } /// Prepares the future which will run all the probes as per generated ProbePlan. @@ -576,11 +369,12 @@ impl Actor { /// failure permanent. Probes in a probe set are essentially retries. /// - Once there are [`ProbeReport`]s from enough nodes, all remaining probes are /// aborted. That is, the main actor loop stops polling them. - async fn spawn_probes_task(&mut self) -> Result>> { - #[cfg(not(wasm_browser))] - let if_state = interfaces::State::new().await; - #[cfg(not(wasm_browser))] - debug!(%if_state, "Local interfaces"); + fn spawn_probes_task( + &self, + if_state: IfStateDetails, + probes: &mut JoinSet, + ) -> CancellationToken { + debug!(?if_state, "local interface details"); let plan = match self.last_report { Some(ref report) => ProbePlan::with_last_report( &self.relay_map, @@ -598,118 +392,68 @@ impl Actor { }; trace!(%plan, "probe plan"); - // The pinger is created here so that any sockets that might be bound for it are - // shared between the probes that use it. It binds sockets lazily, so we can always - // create it. - #[cfg(not(wasm_browser))] - let pinger = Pinger::new(); + let token = CancellationToken::new(); - // A collection of futures running probe sets. - let mut probes = JoinSet::default(); for probe_set in plan.iter() { - let mut set = JoinSet::default(); + let set_token = token.child_token(); for probe in probe_set { - let reportstate = self.addr(); let relay_node = probe.node().clone(); let probe = probe.clone(); - let net_report = self.net_report.clone(); - - #[cfg(not(wasm_browser))] - let pinger = pinger.clone(); - #[cfg(not(wasm_browser))] - let socket_state = self.socket_state.clone(); + let probe_token = set_token.child_token(); + let set_token = set_token.clone(); - let metrics = self.metrics.clone(); - set.spawn( + let fut = probe_token.run_until_cancelled_owned(time::timeout( + PROBES_TIMEOUT, run_probe( - reportstate, relay_node, probe.clone(), - net_report, - metrics, - #[cfg(not(wasm_browser))] - pinger, #[cfg(not(wasm_browser))] - socket_state, + self.socket_state.clone(), #[cfg(any(test, feature = "test-utils"))] self.insecure_skip_relay_cert_verify, - ) - .instrument(debug_span!("run_probe", %probe)), - ); - } - - // Add the probe set to all futures of probe sets. Handle aborting a probe set - // if needed, only normal errors means the set continues. - probes.spawn( - async move { - // Hack because ProbeSet is not it's own type yet. - let mut probe_proto = None; - while let Some(res) = set.join_next().await { - match res { - Ok(Ok(report)) => return Ok(report), - Ok(Err(ProbeError::Error(err, probe))) => { - probe_proto = Some(probe.proto()); - warn!(?probe, "probe failed: {:#}", err); - continue; + ), + )); + probes.spawn( + async move { + let res = fut.await; + let res = match res { + Some(Ok(Ok(report))) => Ok(report), + Some(Ok(Err(ProbeError::Error(err)))) => { + warn!("probe failed: {:#}", err); + Err(err) } - Ok(Err(ProbeError::AbortSet(err, probe))) => { - debug!(?probe, "probe set aborted: {:#}", err); - set.abort_all(); - return Err(err); + Some(Ok(Err(ProbeError::AbortSet(err)))) => { + debug!("probe set aborted: {:#}", err); + set_token.cancel(); + Err(err) } - Err(err) => { - warn!("fatal probe set error, aborting: {:#}", err); - continue; - } - } + Some(Err(time::Elapsed { .. })) => Err(anyhow!("probe timed out")), + None => Err(anyhow!("probe cancelled")), + }; + ProbeFinished::Regular(res) } - warn!(?probe_proto, "no successful probes in ProbeSet"); - Err(anyhow!("All probes in ProbeSet failed")) - } - .instrument(info_span!("probe")), - ); + .instrument(debug_span!("run-probe", %probe)), + ); + } } - self.outstanding_tasks.probes = true; - - Ok(probes) - } -} - -/// Tasks on which the reportgen [`Actor`] is still waiting. -/// -/// There is no particular progression, e.g. hairpin starts `false`, moves to `true` when a -/// check is started and then becomes `false` again once it is finished. -#[derive(Debug, Default)] -struct OutstandingTasks { - probes: bool, - port_mapper: bool, - captive_task: bool, - hairpin: bool, -} -impl OutstandingTasks { - fn all_done(&self) -> bool { - !(self.probes || self.port_mapper || self.captive_task || self.hairpin) + token } } /// The success result of [`run_probe`]. #[derive(Debug, Clone)] -struct ProbeReport { +pub(super) struct ProbeReport { /// Whether we can send IPv4 UDP packets. - ipv4_can_send: bool, + pub(super) ipv4_can_send: bool, /// Whether we can send IPv6 UDP packets. - ipv6_can_send: bool, - /// Whether we can send ICMPv4 packets, `None` if not checked. - icmpv4: Option, - /// Whether we can send ICMPv6 packets, `None` if not checked. - icmpv6: Option, + pub(super) ipv6_can_send: bool, /// The latency to the relay node. - latency: Option, + pub(super) latency: Option, /// The probe that generated this report. - probe: Probe, + pub(super) probe: Probe, /// The discovered public address. - addr: Option, + pub(super) addr: Option, } impl ProbeReport { @@ -718,8 +462,6 @@ impl ProbeReport { probe, ipv4_can_send: false, ipv6_can_send: false, - icmpv4: None, - icmpv6: None, latency: None, addr: None, } @@ -736,9 +478,9 @@ impl ProbeReport { #[derive(Debug)] enum ProbeError { /// Abort the current set. - AbortSet(anyhow::Error, Probe), + AbortSet(anyhow::Error), /// Continue the other probes in the set. - Error(anyhow::Error, Probe), + Error(anyhow::Error), } /// Pieces needed to do QUIC address discovery. @@ -756,16 +498,10 @@ pub struct QuicConfig { } /// Executes a particular [`Probe`], including using a delayed start if needed. -/// -/// If *stun_sock4* and *stun_sock6* are `None` the STUN probes are disabled. #[allow(clippy::too_many_arguments)] async fn run_probe( - reportstate: Addr, relay_node: Arc, probe: Probe, - net_report: net_report::Addr, - metrics: Arc, - #[cfg(not(wasm_browser))] pinger: Pinger, #[cfg(not(wasm_browser))] socket_state: SocketState, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, ) -> Result { @@ -775,65 +511,8 @@ async fn run_probe( } debug!("starting probe"); - let (would_help_tx, would_help_rx) = oneshot::channel(); - if let Err(err) = reportstate - .send(Message::ProbeWouldHelp( - probe.clone(), - relay_node.clone(), - would_help_tx, - )) - .await - { - // this happens on shutdown or if the report is already finished - debug!("Failed to check if probe would help: {err:#}"); - return Err(ProbeError::AbortSet(err.into(), probe.clone())); - } - - if !would_help_rx.await.map_err(|_| { - ProbeError::AbortSet( - anyhow!("ReportCheck actor dropped sender while waiting for ProbeWouldHelp response"), - probe.clone(), - ) - })? { - return Err(ProbeError::AbortSet( - anyhow!("ReportCheck says probe set no longer useful"), - probe, - )); - } - - #[cfg(not(wasm_browser))] - let relay_addr = get_relay_addr(&socket_state.dns_resolver, &relay_node, probe.proto()) - .await - .context("no relay node addr") - .map_err(|e| ProbeError::AbortSet(e, probe.clone()))?; - - let mut result = ProbeReport::new(probe.clone()); match probe { - #[cfg(not(wasm_browser))] - Probe::StunIpv4 { .. } | Probe::StunIpv6 { .. } => { - let maybe_sock = if matches!(probe, Probe::StunIpv4 { .. }) { - socket_state.stun_sock4.as_ref() - } else { - socket_state.stun_sock6.as_ref() - }; - match maybe_sock { - Some(sock) => { - result = run_stun_probe(sock, relay_addr, net_report, probe, &metrics).await?; - } - None => { - return Err(ProbeError::AbortSet( - anyhow!("No socket for {}, aborting probeset", probe.proto()), - probe.clone(), - )); - } - } - } - #[cfg(not(wasm_browser))] - Probe::IcmpV4 { .. } | Probe::IcmpV6 { .. } => { - result = run_icmp_probe(probe, relay_addr, pinger).await? - } Probe::Https { ref node, .. } => { - debug!("sending probe HTTPS"); match measure_https_latency( #[cfg(not(wasm_browser))] &socket_state.dns_resolver, @@ -844,31 +523,37 @@ async fn run_probe( .await { Ok((latency, ip)) => { - debug!(?latency, "latency"); - result.latency = Some(latency); - // We set these IPv4 and IPv6 but they're not really used - // and we don't necessarily set them both. If UDP is blocked - // and both IPv4 and IPv6 are available over TCP, it's basically - // random which fields end up getting set here. - // Since they're not needed, that's fine for now. + debug!(?latency, "https latency"); + let mut report = ProbeReport::new(probe); + report.latency = Some(latency); match ip { - IpAddr::V4(_) => result.ipv4_can_send = true, - IpAddr::V6(_) => result.ipv6_can_send = true, + IpAddr::V4(_) => report.ipv4_can_send = true, + IpAddr::V6(_) => report.ipv6_can_send = true, } + Ok(report) } - Err(err) => { - warn!("https latency measurement failed: {:?}", err); - } + Err(err) => Err(ProbeError::Error(err)), } } #[cfg(not(wasm_browser))] - Probe::QuicIpv4 { ref node, .. } | Probe::QuicIpv6 { ref node, .. } => { - debug!("sending QUIC address discovery probe"); - let url = node.url.clone(); + Probe::QadIpv4 { ref node, .. } | Probe::QadIpv6 { ref node, .. } => { match socket_state.quic_config { Some(quic_config) => { - result = run_quic_probe( + let relay_addr = match probe.proto() { + ProbeProto::QadIpv4 => { + get_relay_addr_ipv4(&socket_state.dns_resolver, &relay_node).await + } + ProbeProto::QadIpv6 => { + get_relay_addr_ipv6(&socket_state.dns_resolver, &relay_node).await + } + _ => unreachable!(), + } + .context("no relay node addr") + .map_err(ProbeError::AbortSet)?; + + let url = node.url.clone(); + let report = run_quic_probe( quic_config, url, relay_addr, @@ -876,99 +561,12 @@ async fn run_probe( socket_state.ip_mapped_addrs, ) .await?; + Ok(report) } - None => { - return Err(ProbeError::AbortSet( - anyhow!("No QUIC endpoint for {}", probe.proto()), - probe.clone(), - )); - } - } - } - } - - trace!("probe successful"); - Ok(result) -} - -/// Run a STUN IPv4 or IPv6 probe. -#[cfg(not(wasm_browser))] -async fn run_stun_probe( - sock: &Arc, - relay_addr: SocketAddr, - net_report: net_report::Addr, - probe: Probe, - metrics: &Metrics, -) -> Result { - match probe.proto() { - ProbeProto::StunIpv4 => debug_assert!(relay_addr.is_ipv4()), - ProbeProto::StunIpv6 => debug_assert!(relay_addr.is_ipv6()), - _ => debug_assert!(false, "wrong probe"), - } - let txid = stun::TransactionId::default(); - let req = stun::request(txid); - - // Setup net_report to give us back the incoming STUN response. - let (stun_tx, stun_rx) = oneshot::channel(); - let (inflight_ready_tx, inflight_ready_rx) = oneshot::channel(); - net_report - .send(net_report::Message::InFlightStun( - net_report::Inflight { - txn: txid, - start: Instant::now(), - s: stun_tx, - }, - inflight_ready_tx, - )) - .await - .map_err(|e| ProbeError::Error(e.into(), probe.clone()))?; - inflight_ready_rx - .await - .map_err(|e| ProbeError::Error(e.into(), probe.clone()))?; - - // Send the probe. - match sock.send_to(&req, relay_addr).await { - Ok(n) if n == req.len() => { - debug!(%relay_addr, %txid, "sending {} probe", probe.proto()); - let mut result = ProbeReport::new(probe.clone()); - - if matches!(probe, Probe::StunIpv4 { .. }) { - result.ipv4_can_send = true; - metrics.stun_packets_sent_ipv4.inc(); - } else { - result.ipv6_can_send = true; - metrics.stun_packets_sent_ipv6.inc(); - } - let (delay, addr) = stun_rx - .await - .map_err(|e| ProbeError::Error(e.into(), probe.clone()))?; - result.latency = Some(delay); - result.addr = Some(addr); - Ok(result) - } - Ok(n) => { - let err = anyhow!("Failed to send full STUN request: {}", probe.proto()); - error!(%relay_addr, sent_len=n, req_len=req.len(), "{err:#}"); - Err(ProbeError::Error(err, probe.clone())) - } - Err(err) => { - let kind = err.kind(); - let err = anyhow::Error::new(err) - .context(format!("Failed to send STUN request: {}", probe.proto())); - - // It is entirely normal that we are on a dual-stack machine with no - // routed IPv6 network. So silence that case. - // NetworkUnreachable and HostUnreachable are still experimental (io_error_more - // #86442) but it is already emitted. So hack around this. - match format!("{kind:?}").as_str() { - "NetworkUnreachable" | "HostUnreachable" => { - debug!(%relay_addr, "{err:#}"); - Err(ProbeError::AbortSet(err, probe.clone())) - } - _ => { - // No need to log this, our caller does already log this. - Err(ProbeError::Error(err, probe.clone())) - } + None => Err(ProbeError::AbortSet(anyhow!( + "No QUIC endpoint for {}", + probe.proto() + ))), } } } @@ -994,29 +592,29 @@ async fn run_quic_probe( probe: Probe, ip_mapped_addrs: Option, ) -> Result { + trace!("QAD probe start"); match probe.proto() { - ProbeProto::QuicIpv4 => debug_assert!(relay_addr.is_ipv4()), - ProbeProto::QuicIpv6 => debug_assert!(relay_addr.is_ipv6()), + ProbeProto::QadIpv4 => debug_assert!(relay_addr.is_ipv4()), + ProbeProto::QadIpv6 => debug_assert!(relay_addr.is_ipv6()), _ => debug_assert!(false, "wrong probe"), } let relay_addr = maybe_to_mapped_addr(ip_mapped_addrs, relay_addr); let host = match url.host_str() { Some(host) => host, None => { - return Err(ProbeError::Error( - anyhow!("URL must have 'host' to use QUIC address discovery probes"), - probe.clone(), - )); + return Err(ProbeError::Error(anyhow!( + "URL must have 'host' to use QUIC address discovery probes" + ))); } }; let quic_client = iroh_relay::quic::QuicClient::new(quic_config.ep, quic_config.client_config) - .map_err(|e| ProbeError::Error(e, probe.clone()))?; + .map_err(ProbeError::Error)?; let (addr, latency) = quic_client .get_addr_and_latency(relay_addr, host) .await - .map_err(|e| ProbeError::Error(e, probe.clone()))?; + .map_err(ProbeError::Error)?; let mut result = ProbeReport::new(probe.clone()); - if matches!(probe, Probe::QuicIpv4 { .. }) { + if matches!(probe, Probe::QadIpv4 { .. }) { result.ipv4_can_send = true; } else { result.ipv6_can_send = true; @@ -1039,19 +637,12 @@ async fn check_captive_portal( ) -> Result { // If we have a preferred relay node and we can use it for non-STUN requests, try that; // otherwise, pick a random one suitable for non-STUN requests. - let preferred_relay = preferred_relay.and_then(|url| match dm.get_node(&url) { - Some(node) if node.stun_only => Some(url), - _ => None, - }); + let preferred_relay = preferred_relay.and_then(|url| dm.get_node(&url).map(|_| url)); let url = match preferred_relay { Some(url) => url, None => { - let urls: Vec<_> = dm - .nodes() - .filter(|n| !n.stun_only) - .map(|n| n.url.clone()) - .collect(); + let urls: Vec<_> = dm.nodes().map(|n| n.url.clone()).collect(); if urls.is_empty() { debug!("No suitable relay node for captive portal check"); return Ok(false); @@ -1114,62 +705,35 @@ async fn check_captive_portal( } /// Returns the proper port based on the protocol of the probe. -fn get_port(relay_node: &RelayNode, proto: &ProbeProto) -> Result { - match proto { - #[cfg(not(wasm_browser))] - ProbeProto::QuicIpv4 | ProbeProto::QuicIpv6 => { - if let Some(ref quic) = relay_node.quic { - if quic.port == 0 { - Ok(DEFAULT_RELAY_QUIC_PORT) - } else { - Ok(quic.port) - } - } else { - bail!("Relay node not suitable for QUIC address discovery probes"); - } - } - _ => { - if relay_node.stun_port == 0 { - Ok(DEFAULT_STUN_PORT) - } else { - Ok(relay_node.stun_port) - } +fn get_quic_port(relay_node: &RelayNode) -> Result { + if let Some(ref quic) = relay_node.quic { + if quic.port == 0 { + Ok(DEFAULT_RELAY_QUIC_PORT) + } else { + Ok(quic.port) } + } else { + bail!("Relay node not suitable for QUIC address discovery probes"); } } -/// Returns the IP address to use to communicate to this relay node. -/// -/// *proto* specifies the protocol of the probe. Depending on the protocol we may return -/// different results. Obviously IPv4 vs IPv6 but a [`RelayNode`] may also have disabled -/// some protocols. -/// -/// If the protocol is `QuicIpv4` or `QuicIpv6`, and `IpMappedAddresses` is not `None`, we -/// assume that we are running this net report with `iroh`, and need to provide mapped -/// addresses to the probe in order for it to function in the specialize iroh-quinn -/// endpoint that expects mapped addresses. +/// Returns the IP address to use to communicate to this relay node for quic. #[cfg(not(wasm_browser))] -async fn get_relay_addr( +async fn get_relay_addr_ipv4( dns_resolver: &DnsResolver, relay_node: &RelayNode, - proto: ProbeProto, ) -> Result { - if relay_node.stun_only && !matches!(proto, ProbeProto::StunIpv4 | ProbeProto::StunIpv6) { - bail!("Relay node not suitable for non-STUN probes"); - } - let port = get_port(relay_node, &proto)?; - - match proto { - ProbeProto::StunIpv4 | ProbeProto::IcmpV4 | ProbeProto::QuicIpv4 => { - relay_lookup_ipv4_staggered(dns_resolver, relay_node, port).await - } - - ProbeProto::StunIpv6 | ProbeProto::IcmpV6 | ProbeProto::QuicIpv6 => { - relay_lookup_ipv6_staggered(dns_resolver, relay_node, port).await - } + let port = get_quic_port(relay_node)?; + relay_lookup_ipv4_staggered(dns_resolver, relay_node, port).await +} - ProbeProto::Https => Err(anyhow!("Not implemented")), - } +#[cfg(not(wasm_browser))] +async fn get_relay_addr_ipv6( + dns_resolver: &DnsResolver, + relay_node: &RelayNode, +) -> Result { + let port = get_quic_port(relay_node)?; + relay_lookup_ipv6_staggered(dns_resolver, relay_node, port).await } /// Do a staggared ipv4 DNS lookup based on [`RelayNode`] @@ -1183,7 +747,7 @@ async fn relay_lookup_ipv4_staggered( ) -> Result { match relay.url.host() { Some(url::Host::Domain(hostname)) => { - debug!(%hostname, "Performing DNS A lookup for relay addr"); + trace!(%hostname, "Performing DNS A lookup for relay addr"); match dns_resolver .lookup_ipv4_staggered(hostname, DNS_TIMEOUT, DNS_STAGGERING_MS) .await @@ -1213,7 +777,7 @@ async fn relay_lookup_ipv6_staggered( ) -> Result { match relay.url.host() { Some(url::Host::Domain(hostname)) => { - debug!(%hostname, "Performing DNS AAAA lookup for relay addr"); + trace!(%hostname, "Performing DNS AAAA lookup for relay addr"); match dns_resolver .lookup_ipv6_staggered(hostname, DNS_TIMEOUT, DNS_STAGGERING_MS) .await @@ -1232,50 +796,6 @@ async fn relay_lookup_ipv6_staggered( } } -/// Runs an ICMP IPv4 or IPv6 probe. -/// -/// The `pinger` is passed in so the ping sockets are only bound once -/// for the probe set. -#[cfg(not(wasm_browser))] -async fn run_icmp_probe( - probe: Probe, - relay_addr: SocketAddr, - pinger: Pinger, -) -> Result { - match probe.proto() { - ProbeProto::IcmpV4 => debug_assert!(relay_addr.is_ipv4()), - ProbeProto::IcmpV6 => debug_assert!(relay_addr.is_ipv6()), - _ => debug_assert!(false, "wrong probe"), - } - const DATA: &[u8; 15] = b"iroh icmp probe"; - debug!(dst = %relay_addr, len = DATA.len(), "ICMP Ping started"); - let latency = pinger - .send(relay_addr.ip(), DATA) - .await - .map_err(|err| match err { - PingError::Client(err) => ProbeError::AbortSet( - anyhow!("Failed to create pinger ({err:#}), aborting probeset"), - probe.clone(), - ), - #[cfg(not(wasm_browser))] - PingError::Ping(err) => ProbeError::Error(err.into(), probe.clone()), - })?; - debug!(dst = %relay_addr, len = DATA.len(), ?latency, "ICMP ping done"); - let mut report = ProbeReport::new(probe); - report.latency = Some(latency); - match relay_addr { - SocketAddr::V4(_) => { - report.ipv4_can_send = true; - report.icmpv4 = Some(true); - } - SocketAddr::V6(_) => { - report.ipv6_can_send = true; - report.icmpv6 = Some(true); - } - } - Ok(report) -} - /// Executes an HTTPS probe. /// /// If `certs` is provided they will be added to the trusted root certificates, allowing the @@ -1286,6 +806,7 @@ async fn measure_https_latency( node: &RelayNode, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, ) -> Result<(Duration, IpAddr)> { + debug!(%node, "measure https latency"); let url = node.url.join(RELAY_PROBE_PATH)?; // This should also use same connection establishment as relay client itself, which @@ -1354,100 +875,9 @@ async fn measure_https_latency( } } -/// Updates a net_report [`Report`] with a new [`ProbeReport`]. -fn update_report(report: &mut Report, probe_report: ProbeReport) { - let relay_node = probe_report.probe.node(); - if let Some(latency) = probe_report.latency { - report - .relay_latency - .update_relay(relay_node.url.clone(), latency); - - #[cfg(not(wasm_browser))] - if matches!( - probe_report.probe.proto(), - ProbeProto::StunIpv4 - | ProbeProto::StunIpv6 - | ProbeProto::QuicIpv4 - | ProbeProto::QuicIpv6 - ) { - report.udp = true; - - match probe_report.addr { - Some(SocketAddr::V4(ipp)) => { - report.ipv4 = true; - report - .relay_v4_latency - .update_relay(relay_node.url.clone(), latency); - if report.global_v4.is_none() { - report.global_v4 = Some(ipp); - } else if report.global_v4 != Some(ipp) { - report.mapping_varies_by_dest_ip = Some(true); - } else if report.mapping_varies_by_dest_ip.is_none() { - report.mapping_varies_by_dest_ip = Some(false); - } - } - Some(SocketAddr::V6(ipp)) => { - report.ipv6 = true; - report - .relay_v6_latency - .update_relay(relay_node.url.clone(), latency); - if report.global_v6.is_none() { - report.global_v6 = Some(ipp); - } else if report.global_v6 != Some(ipp) { - report.mapping_varies_by_dest_ipv6 = Some(true); - warn!("IPv6 Address detected by STUN varies by destination"); - } else if report.mapping_varies_by_dest_ipv6.is_none() { - report.mapping_varies_by_dest_ipv6 = Some(false); - } - } - None => { - // If we are here we had a relay server latency reported from a STUN probe. - // Thus we must have a reported address. - debug_assert!(probe_report.addr.is_some()); - } - } - } - } - report.ipv4_can_send |= probe_report.ipv4_can_send; - report.ipv6_can_send |= probe_report.ipv6_can_send; - report.icmpv4 = report - .icmpv4 - .map(|val| val || probe_report.icmpv4.unwrap_or_default()) - .or(probe_report.icmpv4); - report.icmpv6 = report - .icmpv6 - .map(|val| val || probe_report.icmpv6.unwrap_or_default()) - .or(probe_report.icmpv6); -} - -/// Resolves to pending if the inner is `None`. -#[derive(Debug)] -pub(crate) struct MaybeFuture { - /// Future to be polled. - pub inner: Option, -} - -// NOTE: explicit implementation to bypass derive unnecessary bounds -impl Default for MaybeFuture { - fn default() -> Self { - MaybeFuture { inner: None } - } -} - -impl Future for MaybeFuture { - type Output = T::Output; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - match self.inner { - Some(ref mut t) => Pin::new(t).poll(cx), - None => Poll::Pending, - } - } -} - #[cfg(test)] mod tests { - use std::net::{Ipv4Addr, Ipv6Addr}; + use std::net::Ipv4Addr; use testresult::TestResult; use tracing_test::traced_test; @@ -1455,256 +885,6 @@ mod tests { use super::{super::test_utils, *}; use crate::net_report::dns; - #[tokio::test] - #[traced_test] - async fn test_update_report_stun_working() { - let (_server_a, relay_a) = test_utils::relay().await; - let (_server_b, relay_b) = test_utils::relay().await; - - let mut report = Report::default(); - let relay_a = Arc::new(relay_a); - let relay_b = Arc::new(relay_b); - - // A STUN IPv4 probe from the the first relay server. - let probe_report_a = ProbeReport { - ipv4_can_send: true, - ipv6_can_send: false, - icmpv4: None, - icmpv6: None, - latency: Some(Duration::from_millis(5)), - probe: Probe::StunIpv4 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv4Addr::new(203, 0, 113, 1), 1234).into()), - }; - update_report(&mut report, probe_report_a.clone()); - - assert!(report.udp); - assert_eq!( - report.relay_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert_eq!( - report.relay_v4_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert!(report.ipv4_can_send); - assert!(!report.ipv6_can_send); - - // A second STUN IPv4 probe, same external IP detected but slower. - let probe_report_b = ProbeReport { - latency: Some(Duration::from_millis(8)), - probe: Probe::StunIpv4 { - delay: Duration::ZERO, - node: relay_b.clone(), - }, - ..probe_report_a - }; - update_report(&mut report, probe_report_b); - - assert!(report.udp); - assert_eq!( - report.relay_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert_eq!( - report.relay_v4_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert!(report.ipv4_can_send); - assert!(!report.ipv6_can_send); - - // A STUN IPv6 probe, this one is faster. - let probe_report_a_ipv6 = ProbeReport { - ipv4_can_send: false, - ipv6_can_send: true, - icmpv4: None, - icmpv6: None, - latency: Some(Duration::from_millis(4)), - probe: Probe::StunIpv6 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv6Addr::new(2001, 0xdb8, 0, 0, 0, 0, 0, 1), 1234).into()), - }; - update_report(&mut report, probe_report_a_ipv6); - - assert!(report.udp); - assert_eq!( - report.relay_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(4) - ); - assert_eq!( - report.relay_v6_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(4) - ); - assert!(report.ipv4_can_send); - assert!(report.ipv6_can_send); - } - - #[tokio::test] - #[traced_test] - async fn test_update_report_icmp() { - let (_server_a, relay_a) = test_utils::relay().await; - let (_server_b, relay_b) = test_utils::relay().await; - let relay_a = Arc::new(relay_a); - let relay_b = Arc::new(relay_b); - - let mut report = Report::default(); - - // An ICMPv4 probe from the EU relay server. - let probe_report_eu = ProbeReport { - ipv4_can_send: true, - ipv6_can_send: false, - icmpv4: Some(true), - icmpv6: None, - latency: Some(Duration::from_millis(5)), - probe: Probe::IcmpV4 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv4Addr::new(203, 0, 113, 1), 1234).into()), - }; - update_report(&mut report, probe_report_eu.clone()); - - assert!(!report.udp); - assert!(report.ipv4_can_send); - assert_eq!(report.icmpv4, Some(true)); - - // A second ICMPv4 probe which did not work. - let probe_report_na = ProbeReport { - ipv4_can_send: false, - ipv6_can_send: false, - icmpv4: Some(false), - icmpv6: None, - latency: None, - probe: Probe::IcmpV4 { - delay: Duration::ZERO, - node: relay_b.clone(), - }, - addr: None, - }; - update_report(&mut report, probe_report_na); - - assert_eq!(report.icmpv4, Some(true)); - - // Behold, a STUN probe arrives! - let probe_report_eu_stun = ProbeReport { - ipv4_can_send: true, - ipv6_can_send: false, - icmpv4: None, - icmpv6: None, - latency: Some(Duration::from_millis(5)), - probe: Probe::StunIpv4 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv4Addr::new(203, 0, 113, 1), 1234).into()), - }; - update_report(&mut report, probe_report_eu_stun); - - assert!(report.udp); - assert_eq!(report.icmpv4, Some(true)); - } - - // # ICMP permissions on Linux - // - // ## Using capabilities: CAP_NET_RAW - // - // To run ICMP tests on Linux you need CAP_NET_RAW capabilities. When running tests - // this means you first need to build the binary, set the capabilities and finally run - // the tests. - // - // Build the test binary: - // - // cargo nextest run -p iroh net_report::reportgen::tests --no-run - // - // Find out the test binary location: - // - // cargo nextest list --message-format json -p iroh net_report::reportgen::tests \ - // | jq '."rust-suites"."iroh"."binary-path"' | tr -d \" - // - // Set the CAP_NET_RAW permission, note that nextest runs each test in a child process - // so the capabilities need to be inherited: - // - // sudo setcap CAP_NET_RAW=eip target/debug/deps/iroh-abc123 - // - // Finally run the test: - // - // cargo nextest run -p iroh net_report::reportgen::tests - // - // This allows the pinger to create a SOCK_RAW socket for IPPROTO_ICMP. - // - // - // ## Using sysctl - // - // Now you know the hard way, you can also get this permission a little easier, but - // slightly less secure, by allowing any process running with your group ID to create a - // SOCK_DGRAM for IPPROTO_ICMP. - // - // First find out your group ID: - // - // id --group - // - // Then allow this group to send pings. Note that this is an inclusive range: - // - // sudo sysctl net.ipv4.ping_group_range="1234 1234" - // - // Note that this does not survive a reboot usually, commonly you need to edit - // /etc/sysctl.conf or /etc/sysctl.d/* to persist this across reboots. - // - // TODO: Not sure what about IPv6 pings using sysctl. - #[tokio::test] - #[traced_test] - async fn test_icmpk_probe() { - let pinger = Pinger::new(); - let (server, node) = test_utils::relay().await; - let addr = server.stun_addr().expect("test relay serves stun"); - let probe = Probe::IcmpV4 { - delay: Duration::from_secs(0), - node: Arc::new(node), - }; - - // A single ICMP packet might get lost. Try several and take the first. - let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); - let mut tasks = JoinSet::new(); - for i in 0..8 { - let probe = probe.clone(); - let pinger = pinger.clone(); - let tx = tx.clone(); - tasks.spawn(async move { - time::sleep(Duration::from_millis(i * 100)).await; - let res = run_icmp_probe(probe, addr, pinger).await; - tx.send(res).ok(); - }); - } - let mut last_err = None; - while let Some(res) = rx.recv().await { - match res { - Ok(report) => { - dbg!(&report); - assert_eq!(report.icmpv4, Some(true)); - assert!( - report.latency.expect("should have a latency") > Duration::from_secs(0) - ); - break; - } - Err(ProbeError::Error(err, _probe)) => { - last_err = Some(err); - } - Err(ProbeError::AbortSet(_err, _probe)) => { - // We don't have permission, too bad. - // panic!("no ping permission: {err:#}"); - break; - } - } - } - if let Some(err) = last_err { - panic!("Ping error: {err:#}"); - } - } - #[tokio::test] async fn test_measure_https_latency() -> TestResult { let (_server, relay) = test_utils::relay().await; @@ -1739,7 +919,7 @@ mod tests { }; let url = relay.url.clone(); let port = server.quic_addr().unwrap().port(); - let probe = Probe::QuicIpv4 { + let probe = Probe::QadIpv4 { delay: Duration::from_secs(0), node: relay, }; @@ -1754,7 +934,7 @@ mod tests { { Ok(probe) => probe, Err(e) => match e { - ProbeError::AbortSet(err, _) | ProbeError::Error(err, _) => { + ProbeError::AbortSet(err) | ProbeError::Error(err) => { return Err(err.into()); } }, diff --git a/iroh/src/net_report/reportgen/hairpin.rs b/iroh/src/net_report/reportgen/hairpin.rs deleted file mode 100644 index 7e62e3dd726..00000000000 --- a/iroh/src/net_report/reportgen/hairpin.rs +++ /dev/null @@ -1,308 +0,0 @@ -//! Actor to run hairpinning check. -//! -//! This actor works as follows: -//! -//! - After starting prepares the haircheck: -//! - binds socket -//! - sends traffic from it's socket to trick some routers -//! - When requested performs the hairpin probe. -//! - result is sent to net_report actor addr. -//! - Shuts down -//! -//! Note it will only perform a single hairpin check before shutting down. Any further -//! requests to it will fail which is intentional. - -use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; - -use anyhow::{bail, Context, Result}; -use iroh_relay::protos::stun; -use n0_future::{ - task::{self, AbortOnDropHandle}, - time::{self, Instant}, -}; -use netwatch::UdpSocket; -use tokio::sync::oneshot; -use tracing::{debug, error, info_span, trace, warn, Instrument}; - -use crate::net_report::{self, defaults::timeouts::HAIRPIN_CHECK_TIMEOUT, reportgen, Inflight}; - -/// Handle to the hairpin actor. -/// -/// Dropping it will abort the actor. -#[derive(Debug)] -pub(super) struct Client { - addr: Option>, - _drop_guard: AbortOnDropHandle<()>, -} - -impl Client { - pub(super) fn new(net_report: net_report::Addr, reportgen: reportgen::Addr) -> Self { - let (addr, msg_rx) = oneshot::channel(); - - let actor = Actor { - msg_rx, - net_report, - reportgen, - }; - - let task = - task::spawn(async move { actor.run().await }.instrument(info_span!("hairpin.actor"))); - Self { - addr: Some(addr), - _drop_guard: AbortOnDropHandle::new(task), - } - } - - /// Returns `true` if we have started a hairpin check before. - pub(super) fn has_started(&self) -> bool { - self.addr.is_none() - } - - /// Starts the hairpin check. - /// - /// *dst* should be our own address as discovered by STUN. Hairpin detection works by - /// sending a new STUN request to our own public address, if we receive this request - /// back then hairpinning works, otherwise it does not. - /// - /// Will do nothing if this actor is already finished or a check has already started. - pub(super) fn start_check(&mut self, dst: SocketAddrV4) { - if let Some(addr) = self.addr.take() { - addr.send(Message::StartCheck(dst)).ok(); - } - } -} - -#[derive(Debug)] -enum Message { - /// Performs the hairpin check. - /// - /// The STUN request will be sent to the provided [`SocketAddrV4`] which should be our - /// own address discovered using STUN. - StartCheck(SocketAddrV4), -} - -#[derive(Debug)] -struct Actor { - msg_rx: oneshot::Receiver, - net_report: net_report::Addr, - reportgen: reportgen::Addr, -} - -impl Actor { - async fn run(self) { - match self.run_inner().await { - Ok(_) => trace!("hairpin actor finished successfully"), - Err(err) => error!("Hairpin actor failed: {err:#}"), - } - } - - async fn run_inner(self) -> Result<()> { - let socket = UdpSocket::bind_v4(0).context("Failed to bind hairpin socket on 0.0.0.0:0")?; - - if let Err(err) = Self::prepare_hairpin(&socket).await { - warn!("unable to send hairpin prep: {err:#}"); - // Continue anyway, most routers are fine. - } - - // We only have one message to handle - let Ok(Message::StartCheck(dst)) = self.msg_rx.await else { - return Ok(()); - }; - - let txn = stun::TransactionId::default(); - trace!(%txn, "Sending hairpin with transaction ID"); - let (stun_tx, stun_rx) = oneshot::channel(); - let inflight = Inflight { - txn, - start: Instant::now(), // ignored by hairping probe - s: stun_tx, - }; - let (msg_response_tx, msg_response_rx) = oneshot::channel(); - self.net_report - .send(net_report::Message::InFlightStun(inflight, msg_response_tx)) - .await - .context("net_report actor gone")?; - msg_response_rx.await.context("net_report actor died")?; - - if let Err(err) = socket.send_to(&stun::request(txn), dst.into()).await { - warn!(%dst, "failed to send hairpin check"); - return Err(err.into()); - } - - let now = Instant::now(); - let hairpinning_works = match time::timeout(HAIRPIN_CHECK_TIMEOUT, stun_rx).await { - Ok(Ok(_)) => true, - Ok(Err(_)) => bail!("net_report actor dropped stun response channel"), - Err(_) => false, // Elapsed - }; - debug!( - "hairpinning done in {:?}, res: {:?}", - now.elapsed(), - hairpinning_works - ); - - self.reportgen - .send(super::Message::HairpinResult(hairpinning_works)) - .await - .context("Failed to send hairpin result to reportgen actor")?; - - trace!("reportgen notified"); - - Ok(()) - } - - async fn prepare_hairpin(socket: &UdpSocket) -> Result<()> { - // At least the Apple Airport Extreme doesn't allow hairpin - // sends from a private socket until it's seen traffic from - // that src IP:port to something else out on the internet. - // - // See https://github.com/tailscale/tailscale/issues/188#issuecomment-600728643 - // - // And it seems that even sending to a likely-filtered RFC 5737 - // documentation-only IPv4 range is enough to set up the mapping. - // So do that for now. In the future we might want to classify networks - // that do and don't require this separately. But for now help it. - let documentation_ip = SocketAddr::from((Ipv4Addr::new(203, 0, 113, 1), 12345)); - - socket - .send_to( - b"net_report; see https://github.com/tailscale/tailscale/issues/188", - documentation_ip, - ) - .await?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use bytes::BytesMut; - use tokio::sync::mpsc; - use tracing::info; - use tracing_test::traced_test; - - use super::*; - - #[tokio::test] - #[traced_test] - async fn test_hairpin_success() { - for i in 0..100 { - let now = Instant::now(); - test_hairpin(true).await; - println!("done round {} in {:?}", i + 1, now.elapsed()); - } - } - - #[tokio::test] - #[traced_test] - async fn test_hairpin_failure() { - test_hairpin(false).await; - } - - async fn test_hairpin(hairpinning_works: bool) { - // Setup fake net_report and reportstate actors, hairpinning interacts with them. - let (net_report_tx, mut net_report_rx) = mpsc::channel(32); - let net_report_addr = net_report::Addr { - sender: net_report_tx, - metrics: Default::default(), - }; - let (reportstate_tx, mut reportstate_rx) = mpsc::channel(32); - let reportstate_addr = reportgen::Addr { - sender: reportstate_tx, - }; - - // Create hairpin actor - let mut actor = Client::new(net_report_addr, reportstate_addr); - - // Hairpinning works by asking the hairpin actor to send a STUN request to our - // discovered public address. If the router returns it hairpinning works. We - // emulate this by binding a random socket which we pretend is our publicly - // discovered address. The hairpin actor will send it a request and we return it - // via the inflight channel. - let public_sock = UdpSocket::bind_local_v4(0).unwrap(); - let ipp_v4 = match public_sock.local_addr().unwrap() { - SocketAddr::V4(ipp) => ipp, - SocketAddr::V6(_) => unreachable!(), - }; - actor.start_check(ipp_v4); - - // This bit is our dummy net_report actor: it handles the inflight request and sends - // back the STUN request once it arrives. - let dummy_net_report = tokio::spawn( - async move { - let net_report::Message::InFlightStun(inflight, resp_tx) = - net_report_rx.recv().await.unwrap() - else { - panic!("Wrong message received"); - }; - resp_tx.send(()).unwrap(); - - let mut buf = BytesMut::zeroed(64 << 10); - let (count, addr) = public_sock.recv_from(&mut buf).await.unwrap(); - info!( - addr=?public_sock.local_addr().unwrap(), - %count, - "Forwarding payload to hairpin actor", - ); - let payload = buf.split_to(count).freeze(); - let txn = stun::parse_binding_request(&payload).unwrap(); - assert_eq!(txn, inflight.txn); - - if hairpinning_works { - // We want hairpinning to work, send back the STUN request. - inflight.s.send((Duration::new(0, 1), addr)).unwrap(); - } else { - // We want hairpinning to fail, just wait but do not drop the STUN response - // channel because that would make the hairpin actor detect an error. - info!("Received hairpin request, not sending response"); - tokio::time::sleep(HAIRPIN_CHECK_TIMEOUT * 8).await; - } - } - .instrument(info_span!("dummy-net_report")), - ); - - // Next we expect our dummy reportstate to receive the result. - match reportstate_rx.recv().await { - Some(reportgen::Message::HairpinResult(val)) => assert_eq!(val, hairpinning_works), - Some(msg) => panic!("Unexpected reportstate message: {msg:?}"), - None => panic!("reportstate mpsc has no senders"), - } - - // Cleanup: our dummy net_report actor should finish - dummy_net_report - .await - .expect("error in dummy net_report actor"); - } - - #[tokio::test] - #[traced_test] - async fn test_client_drop() { - // Setup fake net_report and reportstate actors, hairpinning interacts with them. - let (net_report_tx, _net_report_rx) = mpsc::channel(32); - let net_report_addr = net_report::Addr { - sender: net_report_tx, - metrics: Default::default(), - }; - let (reportstate_tx, _reportstate_rx) = mpsc::channel(32); - let reportstate_addr = reportgen::Addr { - sender: reportstate_tx, - }; - - // Create hairpin actor - let mut client = Client::new(net_report_addr, reportstate_addr); - - // Save the addr, drop the client - let addr = client.addr.take(); - drop(client); - tokio::task::yield_now().await; - - // Check the actor is gone - let ipp_v4 = SocketAddrV4::new(Ipv4Addr::LOCALHOST, 10); - match addr.unwrap().send(Message::StartCheck(ipp_v4)) { - Err(_) => (), - _ => panic!("actor still running"), - } - } -} diff --git a/iroh/src/net_report/reportgen/probes.rs b/iroh/src/net_report/reportgen/probes.rs index 3169e474373..2788e70c811 100644 --- a/iroh/src/net_report/reportgen/probes.rs +++ b/iroh/src/net_report/reportgen/probes.rs @@ -10,8 +10,6 @@ use anyhow::{ensure, Result}; use iroh_base::RelayUrl; use iroh_relay::{RelayMap, RelayNode}; use n0_future::time::Duration; -#[cfg(not(wasm_browser))] -use netwatch::interfaces; use crate::net_report::Report; @@ -47,73 +45,32 @@ const NUM_INCREMENTAL_RELAYS: usize = 3; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, derive_more::Display)] #[repr(u8)] pub enum ProbeProto { - /// STUN IPv4 - #[cfg(not(wasm_browser))] - StunIpv4, - /// STUN IPv6 - #[cfg(not(wasm_browser))] - StunIpv6, /// HTTPS Https, - /// ICMP IPv4 - #[cfg(not(wasm_browser))] - IcmpV4, - /// ICMP IPv6 - #[cfg(not(wasm_browser))] - IcmpV6, /// QUIC Address Discovery Ipv4 #[cfg(not(wasm_browser))] - QuicIpv4, + QadIpv4, /// QUIC Address Discovery Ipv6 #[cfg(not(wasm_browser))] - QuicIpv6, + QadIpv6, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, derive_more::Display)] -pub(super) enum Probe { - #[display("STUN Ipv4 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - StunIpv4 { - /// When the probe is started, relative to the time that `get_report` is called. - /// One probe in each `ProbePlan` should have a delay of 0. Non-zero values - /// are for retries on UDP loss or timeout. - delay: Duration, - - /// The relay server to send this probe to. - node: Arc, - }, - #[display("STUN Ipv6 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - StunIpv6 { - delay: Duration, - node: Arc, - }, +pub(crate) enum Probe { #[display("HTTPS after {delay:?} to {node}")] Https { delay: Duration, node: Arc, }, - #[display("ICMPv4 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - IcmpV4 { - delay: Duration, - node: Arc, - }, - #[display("ICMPv6 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - IcmpV6 { - delay: Duration, - node: Arc, - }, #[display("QAD Ipv4 after {delay:?} to {node}")] #[cfg(not(wasm_browser))] - QuicIpv4 { + QadIpv4 { delay: Duration, node: Arc, }, #[display("QAD Ipv6 after {delay:?} to {node}")] #[cfg(not(wasm_browser))] - QuicIpv6 { + QadIpv6 { delay: Duration, node: Arc, }, @@ -123,50 +80,44 @@ impl Probe { pub(super) fn delay(&self) -> Duration { match self { #[cfg(not(wasm_browser))] - Probe::StunIpv4 { delay, .. } - | Probe::StunIpv6 { delay, .. } - | Probe::Https { delay, .. } - | Probe::IcmpV4 { delay, .. } - | Probe::IcmpV6 { delay, .. } - | Probe::QuicIpv4 { delay, .. } - | Probe::QuicIpv6 { delay, .. } => *delay, + Probe::Https { delay, .. } + | Probe::QadIpv4 { delay, .. } + | Probe::QadIpv6 { delay, .. } => *delay, #[cfg(wasm_browser)] Probe::Https { delay, .. } => *delay, } } - pub(super) fn proto(&self) -> ProbeProto { + pub(crate) fn proto(&self) -> ProbeProto { match self { - #[cfg(not(wasm_browser))] - Probe::StunIpv4 { .. } => ProbeProto::StunIpv4, - #[cfg(not(wasm_browser))] - Probe::StunIpv6 { .. } => ProbeProto::StunIpv6, Probe::Https { .. } => ProbeProto::Https, #[cfg(not(wasm_browser))] - Probe::IcmpV4 { .. } => ProbeProto::IcmpV4, - #[cfg(not(wasm_browser))] - Probe::IcmpV6 { .. } => ProbeProto::IcmpV6, + Probe::QadIpv4 { .. } => ProbeProto::QadIpv4, #[cfg(not(wasm_browser))] - Probe::QuicIpv4 { .. } => ProbeProto::QuicIpv4, - #[cfg(not(wasm_browser))] - Probe::QuicIpv6 { .. } => ProbeProto::QuicIpv6, + Probe::QadIpv6 { .. } => ProbeProto::QadIpv6, } } - pub(super) fn node(&self) -> &Arc { + pub(crate) fn node(&self) -> &Arc { match self { #[cfg(not(wasm_browser))] - Probe::StunIpv4 { node, .. } - | Probe::StunIpv6 { node, .. } - | Probe::Https { node, .. } - | Probe::IcmpV4 { node, .. } - | Probe::IcmpV6 { node, .. } - | Probe::QuicIpv4 { node, .. } - | Probe::QuicIpv6 { node, .. } => node, + Probe::Https { node, .. } + | Probe::QadIpv4 { node, .. } + | Probe::QadIpv6 { node, .. } => node, #[cfg(wasm_browser)] Probe::Https { node, .. } => node, } } + + #[cfg(not(wasm_browser))] + pub(super) fn is_udp(&self) -> bool { + matches!(self, Self::QadIpv4 { .. } | Self::QadIpv6 { .. }) + } + + #[cfg(wasm_browser)] + pub(super) fn is_udp(&self) -> bool { + false + } } /// A probe set is a sequence of similar [`Probe`]s with delays between them. @@ -250,7 +201,7 @@ impl ProbePlan { pub(super) fn initial( relay_map: &RelayMap, protocols: &BTreeSet, - if_state: &interfaces::State, + if_state: &super::IfStateDetails, ) -> Self { let mut plan = Self { set: BTreeSet::new(), @@ -262,52 +213,34 @@ impl ProbePlan { let mut max_high_prio_delay: Option = None; for relay_node in relay_map.nodes() { - let mut stun_ipv4_probes = ProbeSet::new(ProbeProto::StunIpv4); - let mut stun_ipv6_probes = ProbeSet::new(ProbeProto::StunIpv6); - let mut quic_ipv4_probes = ProbeSet::new(ProbeProto::QuicIpv4); - let mut quic_ipv6_probes = ProbeSet::new(ProbeProto::QuicIpv6); + let mut quic_ipv4_probes = ProbeSet::new(ProbeProto::QadIpv4); + let mut quic_ipv6_probes = ProbeSet::new(ProbeProto::QadIpv6); for attempt in 0..3 { let delay = DEFAULT_INITIAL_RETRANSMIT * attempt as u32; if if_state.have_v4 { - stun_ipv4_probes - .push(Probe::StunIpv4 { - delay, - node: relay_node.clone(), - }) - .expect("adding StunIpv4 probe to a StunIpv4 probe set"); quic_ipv4_probes - .push(Probe::QuicIpv4 { + .push(Probe::QadIpv4 { delay, node: relay_node.clone(), }) .expect("adding QuicIpv4 probe to a QuicIpv4 probe set"); } if if_state.have_v6 { - stun_ipv6_probes - .push(Probe::StunIpv6 { - delay, - node: relay_node.clone(), - }) - .expect("adding StunIpv6 probe to a StunIpv6 probe set"); quic_ipv6_probes - .push(Probe::QuicIpv6 { + .push(Probe::QadIpv6 { delay, node: relay_node.clone(), }) .expect("adding QuicIpv6 probe to a QuicAddrIpv6 probe set"); } } - plan.add_if_enabled(stun_ipv4_probes); - plan.add_if_enabled(stun_ipv6_probes); plan.add_if_enabled(quic_ipv4_probes); plan.add_if_enabled(quic_ipv6_probes); - // The HTTP and ICMP probes only start after the STUN probes have had a chance. + // The HTTP probes only start after the QUAD probes have had a chance. let mut https_probes = ProbeSet::new(ProbeProto::Https); - let mut icmp_v4_probes = ProbeSet::new(ProbeProto::IcmpV4); - let mut icmp_v6_probes = ProbeSet::new(ProbeProto::IcmpV6); for attempt in 0..3 { let mut start = *max_high_prio_delay.get_or_insert_with(|| plan.max_delay()); @@ -324,27 +257,9 @@ impl ProbePlan { node: relay_node.clone(), }) .expect("adding Https probe to a Https probe set"); - if if_state.have_v4 { - icmp_v4_probes - .push(Probe::IcmpV4 { - delay, - node: relay_node.clone(), - }) - .expect("adding Icmp probe to an Icmp probe set"); - } - if if_state.have_v6 { - icmp_v6_probes - .push(Probe::IcmpV6 { - delay, - node: relay_node.clone(), - }) - .expect("adding IcmpIpv6 probe to and IcmpIpv6 probe set"); - } } plan.add_if_enabled(https_probes); - plan.add_if_enabled(icmp_v4_probes); - plan.add_if_enabled(icmp_v6_probes); } plan } @@ -385,7 +300,7 @@ impl ProbePlan { relay_map: &RelayMap, last_report: &Report, protocols: &BTreeSet, - if_state: &interfaces::State, + if_state: &super::IfStateDetails, ) -> Self { if last_report.relay_latency.is_empty() { return Self::initial(relay_map, protocols, if_state); @@ -395,12 +310,12 @@ impl ProbePlan { protocols: protocols.clone(), }; - // The first time we need add probes after the STUN we record this delay, so that + // The first time we need to add probes after the STUN we record this delay, so that // further relay servers can reuse this delay. let mut max_stun_delay: Option = None; - let had_stun_ipv4 = !last_report.relay_v4_latency.is_empty(); - let had_stun_ipv6 = !last_report.relay_v6_latency.is_empty(); + let had_stun_ipv4 = !last_report.relay_latency.ipv4().is_empty(); + let had_stun_ipv6 = !last_report.relay_latency.ipv6().is_empty(); let had_both = if_state.have_v6 && had_stun_ipv4 && had_stun_ipv6; let sorted_relays = sort_relays(relay_map, last_report); for (ri, (url, relay_node)) in sorted_relays.into_iter().enumerate() { @@ -440,52 +355,34 @@ impl ProbePlan { .map(|l| l * 120 / 100) // increases latency by 20%, why? .unwrap_or(DEFAULT_ACTIVE_RETRANSMIT_DELAY); - let mut stun_ipv4_probes = ProbeSet::new(ProbeProto::StunIpv4); - let mut stun_ipv6_probes = ProbeSet::new(ProbeProto::StunIpv6); - let mut quic_ipv4_probes = ProbeSet::new(ProbeProto::QuicIpv4); - let mut quic_ipv6_probes = ProbeSet::new(ProbeProto::QuicIpv6); + let mut quic_ipv4_probes = ProbeSet::new(ProbeProto::QadIpv4); + let mut quic_ipv6_probes = ProbeSet::new(ProbeProto::QadIpv6); for attempt in 0..attempts { let delay = (retransmit_delay * attempt as u32) + (ACTIVE_RETRANSMIT_EXTRA_DELAY * attempt as u32); if do4 { - stun_ipv4_probes - .push(Probe::StunIpv4 { - delay, - node: relay_node.clone(), - }) - .expect("Pushing StunIpv4 Probe to StunIpv4 ProbeSet"); quic_ipv4_probes - .push(Probe::QuicIpv4 { + .push(Probe::QadIpv4 { delay, node: relay_node.clone(), }) .expect("adding QuicIpv4 probe to a QuicAddrIpv4 probe set"); } if do6 { - stun_ipv6_probes - .push(Probe::StunIpv6 { - delay, - node: relay_node.clone(), - }) - .expect("Pushing StunIpv6 Probe to StunIpv6 ProbeSet"); quic_ipv6_probes - .push(Probe::QuicIpv6 { + .push(Probe::QadIpv6 { delay, node: relay_node.clone(), }) .expect("adding QuicIpv6 probe to a QuicAddrIpv6 probe set"); } } - plan.add_if_enabled(stun_ipv4_probes); - plan.add_if_enabled(stun_ipv6_probes); plan.add_if_enabled(quic_ipv4_probes); plan.add_if_enabled(quic_ipv6_probes); // The HTTP and ICMP probes only start after the STUN probes have had a chance. let mut https_probes = ProbeSet::new(ProbeProto::Https); - let mut icmp_v4_probes = ProbeSet::new(ProbeProto::IcmpV4); - let mut icmp_v6_probes = ProbeSet::new(ProbeProto::IcmpV6); let start = *max_stun_delay.get_or_insert_with(|| plan.max_delay()); for attempt in 0..attempts { let delay = start @@ -497,27 +394,9 @@ impl ProbePlan { node: relay_node.clone(), }) .expect("Pushing Https Probe to an Https ProbeSet"); - if do4 { - icmp_v4_probes - .push(Probe::IcmpV4 { - delay, - node: relay_node.clone(), - }) - .expect("Pushing IcmpV4 Probe to an Icmp ProbeSet"); - } - if do6 { - icmp_v6_probes - .push(Probe::IcmpV6 { - delay, - node: relay_node.clone(), - }) - .expect("Pusying IcmpV6 Probe to an IcmpV6 ProbeSet"); - } } plan.add_if_enabled(https_probes); - plan.add_if_enabled(icmp_v4_probes); - plan.add_if_enabled(icmp_v6_probes); } plan } @@ -603,13 +482,7 @@ impl ProbePlan { fn has_priority_probes(&self) -> bool { #[cfg(not(wasm_browser))] for probe in &self.set { - if matches!( - probe.proto, - ProbeProto::StunIpv4 - | ProbeProto::StunIpv6 - | ProbeProto::QuicIpv4 - | ProbeProto::QuicIpv6 - ) { + if matches!(probe.proto, ProbeProto::QadIpv4 | ProbeProto::QadIpv6) { return true; } } @@ -681,7 +554,7 @@ mod tests { use tracing_test::traced_test; use super::*; - use crate::net_report::{test_utils, RelayLatencies}; + use crate::net_report::{reportgen::IfStateDetails, test_utils, RelayLatencies}; /// Shorthand which declares a new ProbeSet. /// @@ -704,15 +577,7 @@ mod tests { } fn default_protocols() -> BTreeSet { - BTreeSet::from([ - ProbeProto::StunIpv4, - ProbeProto::StunIpv6, - ProbeProto::QuicIpv4, - ProbeProto::QuicIpv6, - ProbeProto::IcmpV4, - ProbeProto::IcmpV6, - ProbeProto::Https, - ]) + BTreeSet::from([ProbeProto::QadIpv4, ProbeProto::QadIpv6, ProbeProto::Https]) } #[tokio::test] @@ -720,33 +585,19 @@ mod tests { let (_servers, relay_map) = test_utils::relay_map(2).await; let relay_node_1 = relay_map.nodes().next().unwrap(); let relay_node_2 = relay_map.nodes().nth(1).unwrap(); - let if_state = interfaces::State::fake(); + let if_state = IfStateDetails::fake(); let plan = ProbePlan::initial(&relay_map, &default_protocols(), &if_state); let mut expected_plan: ProbePlan = [ probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::StunIpv6, + proto: ProbeProto::QadIpv4, relay: relay_node_1.clone(), delays: [Duration::ZERO, Duration::from_millis(100), Duration::from_millis(200)], }, probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, + proto: ProbeProto::QadIpv6, relay: relay_node_1.clone(), delays: [Duration::ZERO, Duration::from_millis(100), @@ -760,42 +611,14 @@ mod tests { Duration::from_millis(500)], }, probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_1.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_1.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::StunIpv4, + proto: ProbeProto::QadIpv4, relay: relay_node_2.clone(), delays: [Duration::ZERO, Duration::from_millis(100), Duration::from_millis(200)], }, probeset! { - proto: ProbeProto::StunIpv6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, + proto: ProbeProto::QadIpv6, relay: relay_node_2.clone(), delays: [Duration::ZERO, Duration::from_millis(100), @@ -808,20 +631,6 @@ mod tests { Duration::from_millis(400), Duration::from_millis(500)], }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_2.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_2.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, ] .into_iter() .collect(); @@ -842,12 +651,8 @@ mod tests { let (_servers, relay_map) = test_utils::relay_map(2).await; let relay_node_1 = relay_map.nodes().next().unwrap(); let relay_node_2 = relay_map.nodes().nth(1).unwrap(); - let if_state = interfaces::State::fake(); - let plan = ProbePlan::initial( - &relay_map, - &BTreeSet::from([ProbeProto::Https, ProbeProto::IcmpV4, ProbeProto::IcmpV6]), - &if_state, - ); + let if_state = IfStateDetails::fake(); + let plan = ProbePlan::initial(&relay_map, &BTreeSet::from([ProbeProto::Https]), &if_state); let mut expected_plan: ProbePlan = [ probeset! { @@ -857,20 +662,6 @@ mod tests { Duration::from_millis(100), Duration::from_millis(200)], }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, probeset! { proto: ProbeProto::Https, relay: relay_node_2.clone(), @@ -878,25 +669,10 @@ mod tests { Duration::from_millis(100), Duration::from_millis(200)], }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, ] .into_iter() .collect(); - expected_plan.protocols = - BTreeSet::from([ProbeProto::Https, ProbeProto::IcmpV4, ProbeProto::IcmpV6]); + expected_plan.protocols = BTreeSet::from([ProbeProto::Https]); println!("expected:"); println!("{expected_plan}"); @@ -914,13 +690,21 @@ mod tests { let (_servers, relay_map) = test_utils::relay_map(2).await; let relay_node_1 = relay_map.nodes().next().unwrap().clone(); let relay_node_2 = relay_map.nodes().nth(1).unwrap().clone(); - let if_state = interfaces::State::fake(); + let if_state = IfStateDetails::fake(); for i in 0..10 { println!("round {}", i); - let mut latencies = RelayLatencies::new(); - latencies.update_relay(relay_node_1.url.clone(), Duration::from_millis(2)); - latencies.update_relay(relay_node_2.url.clone(), Duration::from_millis(2)); + let mut latencies = RelayLatencies::default(); + latencies.update_relay( + relay_node_1.url.clone(), + Duration::from_millis(2), + ProbeProto::QadIpv4, + ); + latencies.update_relay( + relay_node_2.url.clone(), + Duration::from_millis(2), + ProbeProto::QadIpv4, + ); let last_report = Report { udp: true, ipv6: true, @@ -928,16 +712,11 @@ mod tests { ipv6_can_send: true, ipv4_can_send: true, os_has_ipv6: true, - icmpv4: None, - icmpv6: None, mapping_varies_by_dest_ip: Some(false), mapping_varies_by_dest_ipv6: Some(false), - hair_pinning: Some(true), portmap_probe: None, preferred_relay: Some(relay_node_1.url.clone()), relay_latency: latencies.clone(), - relay_v4_latency: latencies.clone(), - relay_v6_latency: latencies.clone(), global_v4: None, global_v6: None, captive_portal: None, @@ -950,23 +729,7 @@ mod tests { ); let mut expected_plan: ProbePlan = [ probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400), - Duration::from_micros(104_800), - Duration::from_micros(157_200)], - }, - probeset! { - proto: ProbeProto::StunIpv6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400), - Duration::from_micros(104_800), - Duration::from_micros(157_200)], - }, - probeset! { - proto: ProbeProto::QuicIpv4, + proto: ProbeProto::QadIpv4, relay: relay_node_1.clone(), delays: [Duration::ZERO, Duration::from_micros(52_400), @@ -974,7 +737,7 @@ mod tests { Duration::from_micros(157_200)], }, probeset! { - proto: ProbeProto::QuicIpv6, + proto: ProbeProto::QadIpv6, relay: relay_node_1.clone(), delays: [Duration::ZERO, Duration::from_micros(52_400), @@ -990,41 +753,13 @@ mod tests { Duration::from_micros(364_400)], }, probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_1.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600), - Duration::from_micros(312_000), - Duration::from_micros(364_400)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_1.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600), - Duration::from_micros(312_000), - Duration::from_micros(364_400)], - }, - probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400)], - }, - probeset! { - proto: ProbeProto::StunIpv6, + proto: ProbeProto::QadIpv4, relay: relay_node_2.clone(), delays: [Duration::ZERO, Duration::from_micros(52_400)], }, probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, + proto: ProbeProto::QadIpv6, relay: relay_node_2.clone(), delays: [Duration::ZERO, Duration::from_micros(52_400)], @@ -1035,18 +770,6 @@ mod tests { delays: [Duration::from_micros(207_200), Duration::from_micros(259_600)], }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_2.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_2.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600)], - }, ] .into_iter() .collect(); @@ -1070,12 +793,12 @@ mod tests { url_2: &RelayUrl, latency_2: Option, ) -> Report { - let mut latencies = RelayLatencies::new(); + let mut latencies = RelayLatencies::default(); if let Some(latency_1) = latency_1 { - latencies.update_relay(url_1.clone(), latency_1); + latencies.update_relay(url_1.clone(), latency_1, ProbeProto::QadIpv4); } if let Some(latency_2) = latency_2 { - latencies.update_relay(url_2.clone(), latency_2); + latencies.update_relay(url_2.clone(), latency_2, ProbeProto::QadIpv4); } Report { udp: true, @@ -1084,16 +807,11 @@ mod tests { ipv6_can_send: true, ipv4_can_send: true, os_has_ipv6: true, - icmpv4: None, - icmpv6: None, mapping_varies_by_dest_ip: Some(false), mapping_varies_by_dest_ipv6: Some(false), - hair_pinning: Some(true), portmap_probe: None, preferred_relay: Some(url_1.clone()), - relay_latency: latencies.clone(), - relay_v4_latency: latencies.clone(), - relay_v6_latency: latencies.clone(), + relay_latency: latencies, global_v4: None, global_v6: None, captive_portal: None, diff --git a/iroh/src/test_utils.rs b/iroh/src/test_utils.rs index c41f9de5285..a10d9354948 100644 --- a/iroh/src/test_utils.rs +++ b/iroh/src/test_utils.rs @@ -5,16 +5,11 @@ use anyhow::Result; pub use dns_and_pkarr_servers::DnsPkarrServer; use iroh_base::RelayUrl; use iroh_relay::{ - server::{ - AccessConfig, CertConfig, QuicConfig, RelayConfig, Server, ServerConfig, StunConfig, - TlsConfig, - }, + server::{AccessConfig, CertConfig, QuicConfig, RelayConfig, Server, ServerConfig, TlsConfig}, RelayMap, RelayNode, RelayQuicConfig, }; use tokio::sync::oneshot; -use crate::defaults::DEFAULT_STUN_PORT; - /// A drop guard to clean up test infrastructure. /// /// After dropping the test infrastructure will asynchronously shutdown and release its @@ -25,32 +20,12 @@ use crate::defaults::DEFAULT_STUN_PORT; #[allow(dead_code)] pub struct CleanupDropGuard(pub(crate) oneshot::Sender<()>); -/// Runs a relay server with STUN and QUIC enabled suitable for tests. +/// Runs a relay server with QUIC enabled suitable for tests. /// /// The returned `Url` is the url of the relay server in the returned [`RelayMap`]. /// When dropped, the returned [`Server`] does will stop running. pub async fn run_relay_server() -> Result<(RelayMap, RelayUrl, Server)> { - run_relay_server_with( - Some(StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - }), - true, - ) - .await -} - -/// Runs a relay server with STUN enabled suitable for tests. -/// -/// The returned `Url` is the url of the relay server in the returned [`RelayMap`]. -/// When dropped, the returned [`Server`] does will stop running. -pub async fn run_relay_server_with_stun() -> Result<(RelayMap, RelayUrl, Server)> { - run_relay_server_with( - Some(StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - }), - false, - ) - .await + run_relay_server_with(true).await } /// Runs a relay server. @@ -62,10 +37,7 @@ pub async fn run_relay_server_with_stun() -> Result<(RelayMap, RelayUrl, Server) /// /// /// The return value is similar to [`run_relay_server`]. -pub async fn run_relay_server_with( - stun: Option, - quic: bool, -) -> Result<(RelayMap, RelayUrl, Server)> { +pub async fn run_relay_server_with(quic: bool) -> Result<(RelayMap, RelayUrl, Server)> { let (certs, server_config) = iroh_relay::server::testing::self_signed_tls_certs_and_config(); let tls = TlsConfig { @@ -91,7 +63,6 @@ pub async fn run_relay_server_with( access: AccessConfig::Everyone, }), quic, - stun, #[cfg(feature = "metrics")] metrics_addr: None, }; @@ -103,8 +74,6 @@ pub async fn run_relay_server_with( .map(|addr| RelayQuicConfig { port: addr.port() }); let n: RelayMap = RelayNode { url: url.clone(), - stun_only: false, - stun_port: server.stun_addr().map_or(DEFAULT_STUN_PORT, |s| s.port()), quic, } .into(); diff --git a/iroh/src/util.rs b/iroh/src/util.rs index 21b5a85f7aa..d442c1c5aa4 100644 --- a/iroh/src/util.rs +++ b/iroh/src/util.rs @@ -73,7 +73,7 @@ impl Future for MaybeFuture { mod tests { use std::pin::pin; - use tokio::time::Duration; + use n0_future::time::Duration; use super::*; diff --git a/iroh/tests/integration.rs b/iroh/tests/integration.rs index 0ccac01cb2f..847b6f42c25 100644 --- a/iroh/tests/integration.rs +++ b/iroh/tests/integration.rs @@ -34,6 +34,8 @@ const ECHO_ALPN: &[u8] = b"echo"; #[test] async fn simple_node_id_based_connection_transfer() -> TestResult { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + setup_logging(); let client = Endpoint::builder().discovery_n0().bind().await?;