diff --git a/Cargo.lock b/Cargo.lock index d3b66a81de5..f20b7645137 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -671,6 +671,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -1163,7 +1173,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2225,6 +2235,7 @@ dependencies = [ "bytes", "cfg_aliases", "clap", + "console_error_panic_hook", "crypto_box", "data-encoding", "der", @@ -2249,7 +2260,7 @@ dependencies = [ "n0-watcher", "nested_enum_utils", "netdev", - "netwatch 0.6.0", + "netwatch", "parse-size", "pin-project", "pkarr", @@ -2463,7 +2474,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2517,7 +2528,6 @@ dependencies = [ "simdutf8", "snafu", "strum", - "stun-rs", "time", "tokio", "tokio-rustls", @@ -2542,7 +2552,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2945,39 +2955,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "netwatch" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eeaa5f7505c93c5a9b35ba84fd21fb8aa3f24678c76acfe8716af7862fb07a" -dependencies = [ - "atomic-waker", - "bytes", - "cfg_aliases", - "derive_more", - "iroh-quinn-udp", - "js-sys", - "libc", - "n0-future", - "nested_enum_utils", - "netdev", - "netlink-packet-core", - "netlink-packet-route 0.23.0", - "netlink-proto", - "netlink-sys", - "serde", - "snafu", - "socket2", - "time", - "tokio", - "tokio-util", - "tracing", - "web-sys", - "windows 0.59.0", - "windows-result 0.3.2", - "wmi", -] - [[package]] name = "netwatch" version = "0.6.0" @@ -3478,9 +3455,9 @@ checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" [[package]] name = "portmapper" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d6db66007eac4a0ec8331d0d20c734bd64f6445d64bbaf0d0a27fea7a054e36" +checksum = "f651ba57abd6d766deb1b86f45b50c189db69204f20126e84f033168c1bf0853" dependencies = [ "base64", "bytes", @@ -3492,7 +3469,7 @@ dependencies = [ "iroh-metrics", "libc", "nested_enum_utils", - "netwatch 0.5.0", + "netwatch", "num_enum", "rand 0.8.5", "serde", @@ -3711,7 +3688,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -4050,7 +4027,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -4152,7 +4129,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs 0.26.11", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -4748,7 +4725,7 @@ dependencies = [ "getrandom 0.3.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -5573,7 +5550,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/docker/Dockerfile b/docker/Dockerfile index 2a945877821..662f3b00179 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,7 @@ FROM rust:alpine AS chef RUN update-ca-certificates RUN apk add --no-cache musl-dev openssl-dev pkgconfig -RUN cargo install cargo-chef +RUN cargo install cargo-chef WORKDIR /iroh FROM chef AS planner @@ -40,7 +40,7 @@ RUN chmod +x /iroh-relay WORKDIR / # expose the default ports -# http, https, stun, metrics +# http, https, metrics EXPOSE 80 443 3478/udp 9090 ENTRYPOINT ["/iroh-relay"] CMD [""] @@ -62,4 +62,4 @@ WORKDIR / # dns, metrics EXPOSE 53/udp 9090 ENTRYPOINT ["/iroh-dns-server"] -CMD [""] \ No newline at end of file +CMD [""] diff --git a/example.config.toml b/example.config.toml index 04d5350b9f3..f36385f79d3 100644 --- a/example.config.toml +++ b/example.config.toml @@ -1,5 +1,2 @@ [[relay_nodes]] url = "https://foo.bar" -stun_only = false -stun_port = 1244 - diff --git a/iroh-relay/Cargo.toml b/iroh-relay/Cargo.toml index 4b66b094d5b..6dfeb2615de 100644 --- a/iroh-relay/Cargo.toml +++ b/iroh-relay/Cargo.toml @@ -51,7 +51,6 @@ reqwest = { version = "0.12", default-features = false, features = [ rustls = { version = "0.23", default-features = false, features = ["ring"] } serde = { version = "1", features = ["derive", "rc"] } strum = { version = "0.26", features = ["derive"] } -stun-rs = "0.1.11" tokio = { version = "1", features = [ "io-util", "macros", diff --git a/iroh-relay/README.md b/iroh-relay/README.md index a87a573af00..e6711b636c5 100644 --- a/iroh-relay/README.md +++ b/iroh-relay/README.md @@ -15,10 +15,10 @@ relays, including: - Relay Protocol: The protocol used to communicate between relay servers and clients - Relay Server: A fully-fledged iroh-relay server over HTTP or HTTPS. - Optionally will also expose a stun endpoint and metrics. + Optionally will also expose a QAD endpoint and metrics. - Relay Client: A client for establishing connections to the relay. - Server Binary: A CLI for running your own relay server. It can be configured - to also offer STUN support and expose metrics. + to also expose metrics. Used in [iroh], created with love by the [n0 team](https://n0.computer/). diff --git a/iroh-relay/src/defaults.rs b/iroh-relay/src/defaults.rs index 12043a0f36e..47959c285bf 100644 --- a/iroh-relay/src/defaults.rs +++ b/iroh-relay/src/defaults.rs @@ -1,10 +1,5 @@ //! Default values used in the relay. -/// The default STUN port used by the Relay server. -/// -/// The STUN port as defined by [RFC 8489]() -pub const DEFAULT_STUN_PORT: u16 = 3478; - /// The default QUIC port used by the Relay server to accept QUIC connections /// for QUIC address discovery /// diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs index b093971e9c1..e76dff745e8 100644 --- a/iroh-relay/src/dns.rs +++ b/iroh-relay/src/dns.rs @@ -4,6 +4,7 @@ use std::{ fmt, future::Future, net::{IpAddr, Ipv6Addr, SocketAddr}, + sync::Arc, }; use hickory_resolver::{name_server::TokioConnectionProvider, TokioResolver}; @@ -14,6 +15,7 @@ use n0_future::{ }; use nested_enum_utils::common_fields; use snafu::{Backtrace, GenerateImplicitData, OptionExt, Snafu}; +use tokio::sync::RwLock; use url::Url; use crate::node_info::{LookupError, NodeInfo}; @@ -77,7 +79,10 @@ impl StaggeredError { /// The DNS resolver used throughout `iroh`. #[derive(Debug, Clone)] -pub struct DnsResolver(TokioResolver); +pub struct DnsResolver { + resolver: Arc>, + nameserver: Option, +} impl DnsResolver { /// Create a new DNS resolver with sensible cross-platform defaults. @@ -86,6 +91,14 @@ impl DnsResolver { /// This does not work at least on some Androids, therefore we fallback /// to the default `ResolverConfig` which uses eg. to google's `8.8.8.8` or `8.8.4.4`. pub fn new() -> Self { + let resolver = Self::new_inner(); + Self { + resolver: Arc::new(RwLock::new(resolver)), + nameserver: None, + } + } + + fn new_inner() -> TokioResolver { let (system_config, mut options) = hickory_resolver::system_conf::read_system_conf().unwrap_or_default(); @@ -110,11 +123,19 @@ impl DnsResolver { let mut builder = TokioResolver::builder_with_config(config, TokioConnectionProvider::default()); *builder.options_mut() = options; - DnsResolver(builder.build()) + builder.build() } /// Create a new DNS resolver configured with a single UDP DNS nameserver. pub fn with_nameserver(nameserver: SocketAddr) -> Self { + let resolver = Self::with_nameserver_inner(nameserver); + Self { + resolver: Arc::new(RwLock::new(resolver)), + nameserver: Some(nameserver), + } + } + + fn with_nameserver_inner(nameserver: SocketAddr) -> TokioResolver { let mut config = hickory_resolver::config::ResolverConfig::new(); let nameserver_config = hickory_resolver::config::NameServerConfig::new( nameserver, @@ -124,12 +145,24 @@ impl DnsResolver { let builder = TokioResolver::builder_with_config(config, TokioConnectionProvider::default()); - DnsResolver(builder.build()) + builder.build() } /// Removes all entries from the cache. - pub fn clear_cache(&self) { - self.0.clear_cache(); + pub async fn clear_cache(&self) { + self.resolver.read().await.clear_cache(); + } + + /// Recreate the inner resolver + pub async fn reset(&self) { + let mut this = self.resolver.write().await; + let resolver = if let Some(nameserver) = self.nameserver { + Self::with_nameserver_inner(nameserver) + } else { + Self::new_inner() + }; + + *this = resolver; } /// Lookup a TXT record. @@ -139,7 +172,8 @@ impl DnsResolver { timeout: Duration, ) -> Result { let host = host.to_string(); - let res = time::timeout(timeout, self.0.txt_lookup(host)).await??; + let this = self.resolver.read().await; + let res = time::timeout(timeout, this.txt_lookup(host)).await??; Ok(TxtLookup(res)) } @@ -150,7 +184,8 @@ impl DnsResolver { timeout: Duration, ) -> Result, DnsError> { let host = host.to_string(); - let addrs = time::timeout(timeout, self.0.ipv4_lookup(host)).await??; + let this = self.resolver.read().await; + let addrs = time::timeout(timeout, this.ipv4_lookup(host)).await??; Ok(addrs.into_iter().map(|ip| IpAddr::V4(ip.0))) } @@ -161,7 +196,8 @@ impl DnsResolver { timeout: Duration, ) -> Result, DnsError> { let host = host.to_string(); - let addrs = time::timeout(timeout, self.0.ipv6_lookup(host)).await??; + let this = self.resolver.read().await; + let addrs = time::timeout(timeout, this.ipv6_lookup(host)).await??; Ok(addrs.into_iter().map(|ip| IpAddr::V6(ip.0))) } @@ -349,7 +385,10 @@ impl Default for DnsResolver { impl From for DnsResolver { fn from(resolver: TokioResolver) -> Self { - DnsResolver(resolver) + DnsResolver { + resolver: Arc::new(RwLock::new(resolver)), + nameserver: None, + } } } diff --git a/iroh-relay/src/lib.rs b/iroh-relay/src/lib.rs index 71a18d7aaf3..e561ddaf4ad 100644 --- a/iroh-relay/src/lib.rs +++ b/iroh-relay/src/lib.rs @@ -20,10 +20,10 @@ doc = "- `server`: A fully-fledged iroh-relay server over HTTP or HTTPS." )] //! -//! Optionally will also expose a stun endpoint and metrics. (requires the feature flag `server`) +//! Optionally will also expose a QAD endpoint and metrics. (requires the feature flag `server`) //! - [`client`]: A client for establishing connections to the relay. //! - *Server Binary*: A CLI for running your own relay server. It can be configured to also offer -//! STUN support and expose metrics. +//! QAD support and expose metrics. // Based on tailscale/derp/derp.go #![cfg_attr(iroh_docsrs, feature(doc_auto_cfg))] diff --git a/iroh-relay/src/main.rs b/iroh-relay/src/main.rs index 3aa4da21546..8bcf7f597e6 100644 --- a/iroh-relay/src/main.rs +++ b/iroh-relay/src/main.rs @@ -16,7 +16,6 @@ use iroh_base::NodeId; use iroh_relay::{ defaults::{ DEFAULT_HTTPS_PORT, DEFAULT_HTTP_PORT, DEFAULT_METRICS_PORT, DEFAULT_RELAY_QUIC_PORT, - DEFAULT_STUN_PORT, }, server::{self as relay, ClientRateLimit, QuicConfig}, }; @@ -114,7 +113,7 @@ struct Config { /// /// Defaults to `true`. /// - /// Disabling will leave only the STUN server. The `http_bind_addr` and `tls` + /// Disabling will leave only the quic server. The `http_bind_addr` and `tls` /// configuration options will be ignored. #[serde(default = "cfg_defaults::enable_relay")] enable_relay: bool, @@ -140,15 +139,6 @@ struct Config { /// /// Must exist if `enable_quic_addr_discovery` is `true`. tls: Option, - /// Whether to run a STUN server. It will bind to the same IP as the `addr` field. - /// - /// Defaults to `true`. - #[serde(default = "cfg_defaults::enable_stun")] - enable_stun: bool, - /// The socket address to bind the STUN server on. - /// - /// Defaults to using the `http_bind_addr` with the port set to [`DEFAULT_STUN_PORT`]. - stun_bind_addr: Option, /// Whether to allow QUIC connections for QUIC address discovery /// /// If no `tls` is set, this will error. @@ -174,7 +164,7 @@ struct Config { key_cache_capacity: Option, /// Access control for relaying connections. /// - /// This controls which nodes are allowed to relay connections, other endpoints, like STUN are not controlled by this. + /// This controls which nodes are allowed to relay connections, other endpoints are not controlled by this. #[serde(default)] access: AccessConfig, } @@ -314,11 +304,6 @@ impl Config { .unwrap_or((Ipv6Addr::UNSPECIFIED, DEFAULT_HTTP_PORT).into()) } - fn stun_bind_addr(&self) -> SocketAddr { - self.stun_bind_addr - .unwrap_or_else(|| SocketAddr::new(self.http_bind_addr().ip(), DEFAULT_STUN_PORT)) - } - fn metrics_bind_addr(&self) -> SocketAddr { self.metrics_bind_addr .unwrap_or_else(|| SocketAddr::new(self.http_bind_addr().ip(), DEFAULT_METRICS_PORT)) @@ -331,8 +316,6 @@ impl Default for Config { enable_relay: cfg_defaults::enable_relay(), http_bind_addr: None, tls: None, - enable_stun: cfg_defaults::enable_stun(), - stun_bind_addr: None, enable_quic_addr_discovery: cfg_defaults::enable_quic_addr_discovery(), limits: None, enable_metrics: cfg_defaults::enable_metrics(), @@ -352,10 +335,6 @@ mod cfg_defaults { true } - pub(crate) fn enable_stun() -> bool { - true - } - pub(crate) fn enable_quic_addr_discovery() -> bool { false } @@ -722,12 +701,8 @@ async fn build_relay_config(cfg: Config) -> Result, - #[snafu(implicit)] - span_trace: n0_snafu::SpanTrace, -})] -#[allow(missing_docs)] -#[derive(Debug, Snafu)] -#[non_exhaustive] -pub enum StunError { - /// The STUN message could not be parsed or is otherwise invalid. - #[snafu(display("invalid message"))] - InvalidMessage {}, - /// STUN request is not a binding request when it should be. - #[snafu(display("not binding"))] - NotBinding {}, - /// STUN packet is not a response when it should be. - #[snafu(display("not success response"))] - NotSuccessResponse {}, - /// STUN response has malformed attributes. - #[snafu(display("malformed attributes"))] - MalformedAttrs {}, - /// STUN request didn't end in fingerprint. - #[snafu(display("no fingerprint"))] - NoFingerprint {}, - /// STUN request had bogus fingerprint. - #[snafu(display("invalid fingerprint"))] - InvalidFingerprint {}, -} - -/// Generates a binding request STUN packet. -pub fn request(tx: TransactionId) -> Vec { - let fp = Fingerprint::default(); - let msg = StunMessageBuilder::new(methods::BINDING, MessageClass::Request) - .with_transaction_id(tx) - .with_attribute(fp) - .build(); - - let encoder = MessageEncoderBuilder::default().build(); - let mut buffer = vec![0u8; 150]; - let size = encoder.encode(&mut buffer, &msg).expect("invalid encoding"); - buffer.truncate(size); - buffer -} - -/// Generates a binding response. -pub fn response(tx: TransactionId, addr: SocketAddr) -> Vec { - let msg = StunMessageBuilder::new(methods::BINDING, MessageClass::SuccessResponse) - .with_transaction_id(tx) - .with_attribute(XorMappedAddress::from(addr)) - .build(); - - let encoder = MessageEncoderBuilder::default().build(); - let mut buffer = vec![0u8; 150]; - let size = encoder.encode(&mut buffer, &msg).expect("invalid encoding"); - buffer.truncate(size); - buffer -} - -// Copied from stun_rs -// const MAGIC_COOKIE: Cookie = Cookie(0x2112_A442); -const COOKIE: [u8; 4] = 0x2112_A442u32.to_be_bytes(); - -/// Reports whether b is a STUN message. -pub fn is(b: &[u8]) -> bool { - b.len() >= stun_rs::MESSAGE_HEADER_SIZE && - b[0]&0b11000000 == 0 && // top two bits must be zero - b[4..8] == COOKIE -} - -/// Parses a STUN binding request. -pub fn parse_binding_request(b: &[u8]) -> Result { - let ctx = DecoderContextBuilder::default() - .with_validation() // ensure fingerprint is validated - .build(); - let decoder = MessageDecoderBuilder::default().with_context(ctx).build(); - let (msg, _) = decoder.decode(b).map_err(|_| InvalidMessageSnafu.build())?; - - let tx = *msg.transaction_id(); - if msg.method() != methods::BINDING { - return Err(NotBindingSnafu.build()); - } - - // TODO: Tailscale sets the software to tailscale, we should check if we want to do this too. - - if msg - .attributes() - .last() - .map(|attr| !attr.is_fingerprint()) - .unwrap_or_default() - { - return Err(NoFingerprintSnafu.build()); - } - - Ok(tx) -} - -/// Parses a successful binding response STUN packet. -/// The IP address is extracted from the XOR-MAPPED-ADDRESS attribute. -pub fn parse_response(b: &[u8]) -> Result<(TransactionId, SocketAddr), StunError> { - let decoder = MessageDecoder::default(); - let (msg, _) = decoder.decode(b).map_err(|_| InvalidMessageSnafu.build())?; - - let tx = *msg.transaction_id(); - if msg.class() != MessageClass::SuccessResponse { - return Err(NotSuccessResponseSnafu.build()); - } - - // Read through the attributes. - // The the addr+port reported by XOR-MAPPED-ADDRESS - // as the canonical value. If the attribute is not - // present but the STUN server responds with - // MAPPED-ADDRESS we fall back to it. - - let mut addr = None; - let mut fallback_addr = None; - for attr in msg.attributes() { - match attr { - StunAttribute::XorMappedAddress(a) => { - let mut a = *a.socket_address(); - a.set_ip(a.ip().to_canonical()); - addr = Some(a); - } - StunAttribute::MappedAddress(a) => { - let mut a = *a.socket_address(); - a.set_ip(a.ip().to_canonical()); - fallback_addr = Some(a); - } - _ => {} - } - } - - if let Some(addr) = addr { - return Ok((tx, addr)); - } - - if let Some(addr) = fallback_addr { - return Ok((tx, addr)); - } - - Err(MalformedAttrsSnafu.build()) -} - -#[cfg(test)] -mod tests { - - use std::net::{IpAddr, Ipv4Addr}; - - use super::*; - - struct ResponseTestCase { - name: &'static str, - data: Vec, - want_tid: Vec, - want_addr: IpAddr, - want_port: u16, - } - - #[test] - fn test_parse_response() { - let cases = vec![ - ResponseTestCase { - name: "google-1", - data: vec![ - 0x01, 0x01, 0x00, 0x0c, 0x21, 0x12, 0xa4, 0x42, - 0x23, 0x60, 0xb1, 0x1e, 0x3e, 0xc6, 0x8f, 0xfa, - 0x93, 0xe0, 0x80, 0x07, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xc7, 0x86, 0x69, 0x57, 0x85, 0x6f, - ], - want_tid: vec![ - 0x23, 0x60, 0xb1, 0x1e, 0x3e, 0xc6, 0x8f, 0xfa, - 0x93, 0xe0, 0x80, 0x07, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 59028, - }, - ResponseTestCase { - name: "google-2", - data: vec![ - 0x01, 0x01, 0x00, 0x0c, 0x21, 0x12, 0xa4, 0x42, - 0xf9, 0xf1, 0x21, 0xcb, 0xde, 0x7d, 0x7c, 0x75, - 0x92, 0x3c, 0xe2, 0x71, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xc7, 0x87, 0x69, 0x57, 0x85, 0x6f, - ], - want_tid: vec![ - 0xf9, 0xf1, 0x21, 0xcb, 0xde, 0x7d, 0x7c, 0x75, - 0x92, 0x3c, 0xe2, 0x71, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 59029, - }, - ResponseTestCase{ - name: "stun.sipgate.net:10000", - data: vec![ - 0x01, 0x01, 0x00, 0x44, 0x21, 0x12, 0xa4, 0x42, - 0x48, 0x2e, 0xb6, 0x47, 0x15, 0xe8, 0xb2, 0x8e, - 0xae, 0xad, 0x64, 0x44, 0x00, 0x01, 0x00, 0x08, - 0x00, 0x01, 0xe4, 0xab, 0x48, 0x45, 0x21, 0x2d, - 0x00, 0x04, 0x00, 0x08, 0x00, 0x01, 0x27, 0x10, - 0xd9, 0x0a, 0x44, 0x98, 0x00, 0x05, 0x00, 0x08, - 0x00, 0x01, 0x27, 0x11, 0xd9, 0x74, 0x7a, 0x8a, - 0x80, 0x20, 0x00, 0x08, 0x00, 0x01, 0xc5, 0xb9, - 0x69, 0x57, 0x85, 0x6f, 0x80, 0x22, 0x00, 0x10, - 0x56, 0x6f, 0x76, 0x69, 0x64, 0x61, 0x2e, 0x6f, - 0x72, 0x67, 0x20, 0x30, 0x2e, 0x39, 0x36, 0x00, - ], - want_tid: vec![ - 0x48, 0x2e, 0xb6, 0x47, 0x15, 0xe8, 0xb2, 0x8e, - 0xae, 0xad, 0x64, 0x44, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 58539, - }, - ResponseTestCase{ - name: "stun.powervoip.com:3478", - data: vec![ - 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42, - 0x7e, 0x57, 0x96, 0x68, 0x29, 0xf4, 0x44, 0x60, - 0x9d, 0x1d, 0xea, 0xa6, 0x00, 0x01, 0x00, 0x08, - 0x00, 0x01, 0xe9, 0xd3, 0x48, 0x45, 0x21, 0x2d, - 0x00, 0x04, 0x00, 0x08, 0x00, 0x01, 0x0d, 0x96, - 0x4d, 0x48, 0xa9, 0xd4, 0x00, 0x05, 0x00, 0x08, - 0x00, 0x01, 0x0d, 0x97, 0x4d, 0x48, 0xa9, 0xd5, - ], - want_tid: vec![ - 0x7e, 0x57, 0x96, 0x68, 0x29, 0xf4, 0x44, 0x60, - 0x9d, 0x1d, 0xea, 0xa6, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])), - want_port: 59859, - }, - ResponseTestCase{ - name: "in-process pion server", - data: vec![ - 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42, - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x0a, - 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x00, 0x00, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43, - 0x80, 0x28, 0x00, 0x04, 0xb6, 0x99, 0xbb, 0x02, - 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42, - ], - want_tid: vec![ - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), - want_port: 61300, - }, - ResponseTestCase{ - name: "stuntman-server ipv6", - data: vec![ - 0x01, 0x01, 0x00, 0x48, 0x21, 0x12, 0xa4, 0x42, - 0x06, 0xf5, 0x66, 0x85, 0xd2, 0x8a, 0xf3, 0xe6, - 0x9c, 0xe3, 0x41, 0xe2, 0x00, 0x01, 0x00, 0x14, - 0x00, 0x02, 0x90, 0xce, 0x26, 0x02, 0x00, 0xd1, - 0xb4, 0xcf, 0xc1, 0x00, 0x38, 0xb2, 0x31, 0xff, - 0xfe, 0xef, 0x96, 0xf6, 0x80, 0x2b, 0x00, 0x14, - 0x00, 0x02, 0x0d, 0x96, 0x26, 0x04, 0xa8, 0x80, - 0x00, 0x02, 0x00, 0xd1, 0x00, 0x00, 0x00, 0x00, - 0x00, 0xc5, 0x70, 0x01, 0x00, 0x20, 0x00, 0x14, - 0x00, 0x02, 0xb1, 0xdc, 0x07, 0x10, 0xa4, 0x93, - 0xb2, 0x3a, 0xa7, 0x85, 0xea, 0x38, 0xc2, 0x19, - 0x62, 0x0c, 0xd7, 0x14, - ], - want_tid: vec![ - 6, 245, 102, 133, 210, 138, 243, 230, 156, 227, - 65, 226, - ], - want_addr: "2602:d1:b4cf:c100:38b2:31ff:feef:96f6".parse().unwrap(), - want_port: 37070, - }, - // Testing STUN attribute padding rules using STUN software attribute - // with values of 1 & 3 length respectively before the XorMappedAddress attribute - ResponseTestCase { - name: "software-a", - data: vec![ - 0x01, 0x01, 0x00, 0x14, 0x21, 0x12, 0xa4, 0x42, - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x01, - 0x61, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43, - ], - want_tid: vec![ - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), - want_port: 61300, - }, - ResponseTestCase { - name: "software-abc", - data: vec![ - 0x01, 0x01, 0x00, 0x14, 0x21, 0x12, 0xa4, 0x42, - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x03, - 0x61, 0x62, 0x63, 0x00, 0x00, 0x20, 0x00, 0x08, - 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43, - ], - want_tid: vec![ - 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c, - 0x4f, 0x3e, 0x30, 0x8e, - ], - want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), - want_port: 61300, - }, - ResponseTestCase { - name: "no-4in6", - data: data_encoding::HEXLOWER.decode(b"010100182112a4424fd5d202dcb37d31fc773306002000140002cd3d2112a4424fd5d202dcb382ce2dc3fcc7").unwrap(), - want_tid: vec![79, 213, 210, 2, 220, 179, 125, 49, 252, 119, 51, 6], - want_addr: IpAddr::V4(Ipv4Addr::from([209, 180, 207, 193])), - want_port: 60463, - }, - ]; - - for (i, test) in cases.into_iter().enumerate() { - println!("Case {i}: {}", test.name); - let (tx, addr_port) = parse_response(&test.data).unwrap(); - assert!(is(&test.data)); - assert_eq!(tx.as_bytes(), &test.want_tid[..]); - assert_eq!(addr_port.ip(), test.want_addr); - assert_eq!(addr_port.port(), test.want_port); - } - } - - #[test] - fn test_parse_binding_request() { - let tx = TransactionId::default(); - let req = request(tx); - assert!(is(&req)); - let got_tx = parse_binding_request(&req).unwrap(); - assert_eq!(got_tx, tx); - } - - #[test] - fn test_stun_cookie() { - assert_eq!(stun_rs::MAGIC_COOKIE, COOKIE); - } - - #[test] - fn test_response() { - let txn = |n| TransactionId::from([n; 12]); - - struct Case { - tx: TransactionId, - addr: IpAddr, - port: u16, - } - let tests = vec![ - Case { - tx: txn(1), - addr: "1.2.3.4".parse().unwrap(), - port: 254, - }, - Case { - tx: txn(2), - addr: "1.2.3.4".parse().unwrap(), - port: 257, - }, - Case { - tx: txn(3), - addr: "1::4".parse().unwrap(), - port: 254, - }, - Case { - tx: txn(4), - addr: "1::4".parse().unwrap(), - port: 257, - }, - ]; - - for tt in tests { - let res = response(tt.tx, SocketAddr::new(tt.addr, tt.port)); - assert!(is(&res)); - let (tx2, addr2) = parse_response(&res).unwrap(); - assert_eq!(tt.tx, tx2); - assert_eq!(tt.addr, addr2.ip()); - assert_eq!(tt.port, addr2.port()); - } - } -} diff --git a/iroh-relay/src/quic.rs b/iroh-relay/src/quic.rs index 9f186e45dad..2f6c0d277fc 100644 --- a/iroh-relay/src/quic.rs +++ b/iroh-relay/src/quic.rs @@ -4,10 +4,7 @@ use std::{net::SocketAddr, sync::Arc}; use n0_future::time::Duration; use nested_enum_utils::common_fields; -use quinn::{ - crypto::rustls::{NoInitialCipherSuite, QuicClientConfig}, - VarInt, -}; +use quinn::{crypto::rustls::QuicClientConfig, VarInt}; use snafu::{Backtrace, Snafu}; use tokio::sync::watch; @@ -20,7 +17,10 @@ pub const QUIC_ADDR_DISC_CLOSE_REASON: &[u8] = b"finished"; #[cfg(feature = "server")] pub(crate) mod server { - use quinn::{crypto::rustls::QuicServerConfig, ApplicationClose, ConnectionError}; + use quinn::{ + crypto::rustls::{NoInitialCipherSuite, QuicServerConfig}, + ApplicationClose, ConnectionError, + }; use snafu::ResultExt; use tokio::task::JoinSet; use tokio_util::{sync::CancellationToken, task::AbortOnDropHandle}; @@ -243,12 +243,10 @@ pub enum Error { Connection { source: quinn::ConnectionError }, #[snafu(transparent)] WatchRecv { source: watch::error::RecvError }, - #[snafu(transparent)] - NoIntitialCipherSuite { source: NoInitialCipherSuite }, } /// Handles the client side of QUIC address discovery. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct QuicClient { /// A QUIC Endpoint. ep: quinn::Endpoint, @@ -259,16 +257,14 @@ pub struct QuicClient { impl QuicClient { /// Create a new QuicClient to handle the client side of QUIC /// address discovery. - pub fn new( - ep: quinn::Endpoint, - mut client_config: rustls::ClientConfig, - ) -> Result { + pub fn new(ep: quinn::Endpoint, mut client_config: rustls::ClientConfig) -> Self { // add QAD alpn client_config.alpn_protocols = vec![ALPN_QUIC_ADDR_DISC.into()]; // go from rustls client config to rustls QUIC specific client config to // a quinn client config - let mut client_config = - quinn::ClientConfig::new(Arc::new(QuicClientConfig::try_from(client_config)?)); + let mut client_config = quinn::ClientConfig::new(Arc::new( + QuicClientConfig::try_from(client_config).expect("known ciphersuite"), + )); // enable the receive side of address discovery let mut transport = quinn_proto::TransportConfig::default(); @@ -284,9 +280,15 @@ impl QuicClient { // timeout (set to 30s by default). transport.initial_rtt(Duration::from_millis(111)); transport.receive_observed_address_reports(true); + + // keep it alive + transport.keep_alive_interval(Some(Duration::from_secs(25))); + transport.max_idle_timeout(Some( + Duration::from_secs(35).try_into().expect("known value"), + )); client_config.transport_config(Arc::new(transport)); - Ok(Self { ep, client_config }) + Self { ep, client_config } } /// Client side of QUIC address discovery. @@ -295,7 +297,8 @@ impl QuicClient { /// and estimated latency of the connection. /// /// Consumes and gracefully closes the connection. - pub async fn get_addr_and_latency( + #[cfg(test)] + async fn get_addr_and_latency( &self, server_addr: SocketAddr, host: &str, @@ -337,16 +340,30 @@ impl QuicClient { conn.close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON); Ok((observed_addr, latency)) } + + /// Create a connection usable for qad + pub async fn create_conn( + &self, + server_addr: SocketAddr, + host: &str, + ) -> Result { + let config = self.client_config.clone(); + let connecting = self.ep.connect_with(config, server_addr, host); + let conn = connecting?.await?; + Ok(conn) + } } #[cfg(all(test, feature = "server"))] mod tests { use std::net::Ipv4Addr; - use n0_future::{task::AbortOnDropHandle, time}; + use n0_future::{ + task::AbortOnDropHandle, + time::{self, Instant}, + }; use n0_snafu::{Error, Result, ResultExt}; use quinn::crypto::rustls::QuicServerConfig; - use tokio::time::Instant; use tracing::{debug, info, info_span, Instrument}; use tracing_test::traced_test; use webpki_types::PrivatePkcs8KeyDer; @@ -376,7 +393,7 @@ mod tests { // create the client configuration used for the client endpoint when they // initiate a connection with the server let client_config = crate::client::make_dangerous_client_config(); - let quic_client = QuicClient::new(client_endpoint.clone(), client_config)?; + let quic_client = QuicClient::new(client_endpoint.clone(), client_config); let (addr, _latency) = quic_client .get_addr_and_latency(quic_server.bind_addr(), &host.to_string()) @@ -409,7 +426,7 @@ mod tests { // create the client configuration used for the client endpoint when they // initiate a connection with the server let client_config = crate::client::make_dangerous_client_config(); - let quic_client = QuicClient::new(client_endpoint.clone(), client_config)?; + let quic_client = QuicClient::new(client_endpoint.clone(), client_config); // Start a connection attempt with nirvana - this will fail let task = AbortOnDropHandle::new(tokio::spawn({ @@ -509,7 +526,7 @@ mod tests { // create the client configuration used for the client endpoint when they // initiate a connection with the server let client_config = crate::client::make_dangerous_client_config(); - let quic_client = QuicClient::new(client_endpoint.clone(), client_config)?; + let quic_client = QuicClient::new(client_endpoint.clone(), client_config); // Now we should still connect, but it should take more than 1s. info!("making QAD request"); diff --git a/iroh-relay/src/relay_map.rs b/iroh-relay/src/relay_map.rs index b16da39384f..4f8740b7735 100644 --- a/iroh-relay/src/relay_map.rs +++ b/iroh-relay/src/relay_map.rs @@ -5,7 +5,7 @@ use std::{collections::BTreeMap, fmt, sync::Arc}; use iroh_base::RelayUrl; use serde::{Deserialize, Serialize}; -use crate::defaults::{DEFAULT_RELAY_QUIC_PORT, DEFAULT_STUN_PORT}; +use crate::defaults::DEFAULT_RELAY_QUIC_PORT; /// Configuration of all the relay servers that can be used. #[derive(Debug, Clone, PartialEq, Eq)] @@ -68,7 +68,7 @@ impl FromIterator for RelayMap { impl From for RelayMap { /// Creates a [`RelayMap`] from a [`RelayUrl`]. /// - /// The [`RelayNode`]s in the [`RelayMap`] will have the default STUN and QUIC address + /// The [`RelayNode`]s in the [`RelayMap`] will have the default QUIC address /// discovery ports. fn from(value: RelayUrl) -> Self { Self { @@ -88,7 +88,7 @@ impl From for RelayMap { impl FromIterator for RelayMap { /// Creates a [`RelayMap`] from an iterator of [`RelayUrl`]. /// - /// The [`RelayNode`]s in the [`RelayMap`] will have the default STUN and QUIC address + /// The [`RelayNode`]s in the [`RelayMap`] will have the default QUIC address /// discovery ports. fn from_iter>(iter: T) -> Self { Self { @@ -116,15 +116,6 @@ impl fmt::Display for RelayMap { pub struct RelayNode { /// The [`RelayUrl`] where this relay server can be dialed. pub url: RelayUrl, - /// Whether this relay server should only be used for STUN requests. - /// - /// This essentially allows you to use a normal STUN server as a relay node, no relay - /// functionality is used. - pub stun_only: bool, - /// The stun port of the relay server. - /// - /// Setting this to `0` means the default STUN port is used. - pub stun_port: u16, /// Configuration to speak to the QUIC endpoint on the relay server. /// /// When `None`, we will not attempt to do QUIC address discovery @@ -137,8 +128,6 @@ impl From for RelayNode { fn from(value: RelayUrl) -> Self { Self { url: value, - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: quic_config(), } } diff --git a/iroh-relay/src/server.rs b/iroh-relay/src/server.rs index 73bd29d712e..ed94a2c19ca 100644 --- a/iroh-relay/src/server.rs +++ b/iroh-relay/src/server.rs @@ -14,7 +14,6 @@ //! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over. //! - HTTPS `/ping`: Used for net_report probes. //! - HTTPS `/generate_204`: Used for net_report probes. -//! - STUN: UDP port for STUN requests/responses. use std::{fmt, future::Future, net::SocketAddr, num::NonZeroU32, pin::Pin, sync::Arc}; @@ -31,16 +30,15 @@ use n0_future::{future::Boxed, StreamExt}; use nested_enum_utils::common_fields; use snafu::{Backtrace, ResultExt, Snafu}; use tokio::{ - net::{TcpListener, UdpSocket}, + net::TcpListener, task::{JoinError, JoinSet}, }; use tokio_util::task::AbortOnDropHandle; -use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument}; +use tracing::{debug, error, info, info_span, instrument, Instrument}; use crate::{ defaults::DEFAULT_KEY_CACHE_CAPACITY, http::RELAY_PROBE_PATH, - protos, quic::server::{QuicServer, QuicSpawnError, ServerHandle as QuicServerHandle}, }; @@ -54,7 +52,7 @@ pub(crate) mod streams; pub mod testing; pub use self::{ - metrics::{Metrics, RelayMetrics, StunMetrics}, + metrics::{Metrics, RelayMetrics}, resolver::{ReloadingResolver, DEFAULT_CERT_RELOAD_INTERVAL}, }; @@ -69,8 +67,14 @@ const INDEX: &[u8] = br#"

"#; const TLS_HEADERS: [(&str, &str); 2] = [ - ("Strict-Transport-Security", "max-age=63072000; includeSubDomains"), - ("Content-Security-Policy", "default-src 'none'; frame-ancestors 'none'; form-action 'none'; base-uri 'self'; block-all-mixed-content; plugin-types 'none'") + ( + "Strict-Transport-Security", + "max-age=63072000; includeSubDomains", + ), + ( + "Content-Security-Policy", + "default-src 'none'; frame-ancestors 'none'; form-action 'none'; base-uri 'self'; block-all-mixed-content; plugin-types 'none'", + ), ]; type BytesBody = http_body_util::Full; @@ -82,7 +86,7 @@ fn body_empty() -> BytesBody { http_body_util::Full::new(hyper::body::Bytes::new()) } -/// Configuration for the full Relay & STUN server. +/// Configuration for the full Relay. /// /// Be aware the generic parameters are for when using the Let's Encrypt TLS configuration. /// If not used dummy ones need to be provided, e.g. `ServerConfig::<(), ()>::default()`. @@ -90,8 +94,6 @@ fn body_empty() -> BytesBody { pub struct ServerConfig { /// Configuration for the Relay server, disabled if `None`. pub relay: Option>, - /// Configuration for the STUN server, disabled if `None`. - pub stun: Option, /// Configuration for the QUIC server, disabled if `None`. pub quic: Option, /// Socket to serve metrics on. @@ -158,15 +160,6 @@ pub enum Access { Deny, } -/// Configuration for the STUN server. -#[derive(Debug)] -pub struct StunConfig { - /// The socket address on which the STUN server should bind. - /// - /// Normally you'd chose port `3478`, see [`crate::defaults::DEFAULT_STUN_PORT`]. - pub bind_addr: SocketAddr, -} - /// Configuration for the QUIC server. #[derive(Debug)] pub struct QuicConfig { @@ -241,17 +234,15 @@ pub enum CertConfig { Reloading, } -/// A running Relay + STUN server. +/// A running Relay + QAD server. /// -/// This is a full Relay server, including STUN, Relay and various associated HTTP services. +/// This is a full Relay server, including QAD, Relay and various associated HTTP services. /// /// Dropping this will stop the server. #[derive(Debug)] pub struct Server { /// The address of the HTTP server, if configured. http_addr: Option, - /// The address of the STUN server, if configured. - stun_addr: Option, /// The address of the HTTPS server, if the relay server is using TLS. /// /// If the Relay server is not using TLS then it is served from the @@ -285,9 +276,7 @@ pub struct Server { pub enum SpawnError { #[snafu(display("Unable to get local address"))] LocalAddr { source: std::io::Error }, - #[snafu(display("Failed to bind STUN listener"))] - UdpSocketBind { source: std::io::Error }, - #[snafu(display("Failed to bind STUN listener"))] + #[snafu(display("Failed to bind QAD listener"))] QuicSpawn { source: QuicSpawnError }, #[snafu(display("Failed to parse TLS header"))] TlsHeaderParse { source: InvalidHeaderValue }, @@ -347,30 +336,6 @@ impl Server { ); } - // Start the STUN server. - let stun_addr = match config.stun { - Some(stun) => { - debug!("Starting STUN server"); - match UdpSocket::bind(stun.bind_addr).await { - Ok(sock) => { - let addr = sock.local_addr().context(LocalAddrSnafu)?; - info!("STUN server listening on {addr}"); - let stun_metrics = metrics.stun.clone(); - tasks.spawn( - async move { - server_stun_listener(sock, stun_metrics).await; - Ok(()) - } - .instrument(info_span!("stun-server", %addr)), - ); - Some(addr) - } - Err(err) => return Err(err).context(UdpSocketBindSnafu), - } - } - None => None, - }; - // Start the Relay server, but first clone the certs out. let certificates = config.relay.as_ref().and_then(|relay| { relay.tls.as_ref().and_then(|tls| match tls.cert { @@ -491,7 +456,6 @@ impl Server { Ok(Self { http_addr: http_addr.or(relay_addr), - stun_addr, https_addr: http_addr.and(relay_addr), quic_addr, relay_handle, @@ -540,11 +504,6 @@ impl Server { self.quic_addr } - /// The socket address the STUN server is listening on. - pub fn stun_addr(&self) -> Option { - self.stun_addr - } - /// The certificates chain if configured with manual TLS certificates. pub fn certificates(&self) -> Option>> { self.certificates.clone() @@ -643,91 +602,6 @@ async fn relay_supervisor( ret } -/// Runs a STUN server. -/// -/// When the future is dropped, the server stops. -async fn server_stun_listener(sock: UdpSocket, metrics: Arc) { - info!(addr = ?sock.local_addr().ok(), "running STUN server"); - let sock = Arc::new(sock); - let mut buffer = vec![0u8; 64 << 10]; - let mut tasks = JoinSet::new(); - loop { - tokio::select! { - biased; - - Some(res) = tasks.join_next() => { - if let Err(err) = res { - if err.is_panic() { - panic!("task panicked: {:#?}", err); - } - } - } - res = sock.recv_from(&mut buffer) => { - match res { - Ok((n, src_addr)) => { - metrics.requests.inc(); - let pkt = &buffer[..n]; - if !protos::stun::is(pkt) { - debug!(%src_addr, "STUN: ignoring non stun packet"); - metrics.bad_requests.inc(); - continue; - } - let pkt = pkt.to_vec(); - tasks.spawn(handle_stun_request(src_addr, pkt, sock.clone(), metrics.clone())); - } - Err(err) => { - metrics.failures.inc(); - warn!("failed to recv: {err:#}"); - } - } - } - } - } -} - -/// Handles a single STUN request, doing all logging required. -async fn handle_stun_request( - src_addr: SocketAddr, - pkt: Vec, - sock: Arc, - metrics: Arc, -) { - let (txid, response) = match protos::stun::parse_binding_request(&pkt) { - Ok(txid) => { - debug!(%src_addr, %txid, "STUN: received binding request"); - (txid, protos::stun::response(txid, src_addr)) - } - Err(err) => { - metrics.bad_requests.inc(); - warn!(%src_addr, "STUN: invalid binding request: {:?}", err); - return; - } - }; - - match sock.send_to(&response, src_addr).await { - Ok(len) => { - if len != response.len() { - warn!( - %src_addr, - %txid, - "failed to write response, {len}/{} bytes sent", - response.len() - ); - } else { - match src_addr { - SocketAddr::V4(_) => metrics.ipv4_success.inc(), - SocketAddr::V6(_) => metrics.ipv6_success.inc(), - }; - } - trace!(%src_addr, %txid, "sent {len} bytes"); - } - Err(err) => { - metrics.failures.inc(); - warn!(%src_addr, %txid, "failed to write response: {err:#}"); - } - } -} - fn root_handler( _r: Request, response: ResponseBuilder, @@ -881,19 +755,17 @@ mod tests { use iroh_base::{NodeId, RelayUrl, SecretKey}; use n0_future::{FutureExt, SinkExt, StreamExt}; use n0_snafu::{Result, ResultExt}; - use tokio::net::UdpSocket; use tracing::{info, instrument}; use tracing_test::traced_test; use super::{ - Access, AccessConfig, RelayConfig, Server, ServerConfig, SpawnError, StunConfig, + Access, AccessConfig, RelayConfig, Server, ServerConfig, SpawnError, NO_CONTENT_CHALLENGE_HEADER, NO_CONTENT_RESPONSE_HEADER, }; use crate::{ client::{conn::ReceivedMessage, ClientBuilder, SendMessage}, dns::DnsResolver, http::{Protocol, HTTP_UPGRADE_PROTOCOL}, - protos, }; async fn spawn_local_relay() -> std::result::Result { @@ -906,7 +778,6 @@ mod tests { access: AccessConfig::Everyone, }), quic: None, - stun: None, metrics_addr: None, }) .await @@ -962,7 +833,6 @@ mod tests { key_cache_capacity: Some(1024), access: AccessConfig::Everyone, }), - stun: None, quic: None, metrics_addr: Some((Ipv4Addr::LOCALHOST, 1234).into()), }) @@ -1196,38 +1066,6 @@ mod tests { Ok(()) } - #[tokio::test] - #[traced_test] - async fn test_stun() { - let server = Server::spawn(ServerConfig::<(), ()> { - relay: None, - stun: Some(StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - }), - quic: None, - metrics_addr: None, - }) - .await - .unwrap(); - - let txid = protos::stun::TransactionId::default(); - let req = protos::stun::request(txid); - let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - socket - .send_to(&req, server.stun_addr().unwrap()) - .await - .unwrap(); - - // get response - let mut buf = vec![0u8; 64000]; - let (len, addr) = socket.recv_from(&mut buf).await.unwrap(); - assert_eq!(addr, server.stun_addr().unwrap()); - buf.truncate(len); - let (txid_back, response_addr) = protos::stun::parse_response(&buf).unwrap(); - assert_eq!(txid, txid_back); - assert_eq!(response_addr, socket.local_addr().unwrap()); - } - #[tokio::test] #[traced_test] async fn test_relay_access_control() -> Result<()> { @@ -1257,7 +1095,6 @@ mod tests { })), }), quic: None, - stun: None, metrics_addr: None, }) .await?; diff --git a/iroh-relay/src/server/metrics.rs b/iroh-relay/src/server/metrics.rs index bf9e52df125..298f6e8141b 100644 --- a/iroh-relay/src/server/metrics.rs +++ b/iroh-relay/src/server/metrics.rs @@ -84,28 +84,10 @@ pub struct Metrics { // pub average_queue_duration: } -/// Metrics tracked for the STUN server. -#[derive(Debug, Default, MetricsGroup)] -#[metrics(name = "stun")] -pub struct StunMetrics { - /// Number of STUN requests made to the server. - pub requests: Counter, - /// Number of successful ipv4 STUN requests served. - pub ipv4_success: Counter, - /// Number of successful ipv6 STUN requests served. - pub ipv6_success: Counter, - /// Number of bad requests made to the STUN endpoint. - pub bad_requests: Counter, - /// Number of STUN requests that end in failure. - pub failures: Counter, -} - /// All metrics tracked in the relay server. #[derive(Debug, Default, Clone, MetricsGroupSet)] #[metrics(name = "relay")] pub struct RelayMetrics { - /// Metrics tracked for the STUN server. - pub stun: Arc, /// Metrics tracked for the relay server. pub server: Arc, } diff --git a/iroh-relay/src/server/testing.rs b/iroh-relay/src/server/testing.rs index fd2989cd81d..efaaf030ed0 100644 --- a/iroh-relay/src/server/testing.rs +++ b/iroh-relay/src/server/testing.rs @@ -1,18 +1,7 @@ //! Exposes functions to quickly configure a server suitable for testing. use std::net::Ipv4Addr; -use super::{ - AccessConfig, CertConfig, QuicConfig, RelayConfig, ServerConfig, StunConfig, TlsConfig, -}; - -/// Creates a [`StunConfig`] suitable for testing. -/// -/// To ensure port availability for testing, the port is configured to be assigned by the OS. -pub fn stun_config() -> StunConfig { - StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - } -} +use super::{AccessConfig, CertConfig, QuicConfig, RelayConfig, ServerConfig, TlsConfig}; /// Creates a [`rustls::ServerConfig`] and certificates suitable for testing. /// @@ -88,13 +77,11 @@ pub fn quic_config() -> QuicConfig { /// Creates a [`ServerConfig`] suitable for testing. /// /// - Relaying is enabled using [`relay_config`] -/// - Stun is enabled using [`stun_config`] /// - QUIC addr discovery is disabled. /// - Metrics are not enabled. pub fn server_config() -> ServerConfig<()> { ServerConfig { relay: Some(relay_config()), - stun: Some(stun_config()), quic: Some(quic_config()), #[cfg(feature = "metrics")] metrics_addr: None, diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 650e6d40229..987e070f1a6 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -41,7 +41,7 @@ http = "1" iroh-base = { version = "0.35.0", default-features = false, features = ["key", "relay"], path = "../iroh-base" } iroh-relay = { version = "0.35", path = "../iroh-relay", default-features = false } n0-future = "0.1.2" -n0-snafu = "0.2.0" +n0-snafu = "0.2.1" n0-watcher = "0.2" nested_enum_utils = "0.2.1" netwatch = { version = "0.6" } @@ -109,7 +109,7 @@ parse-size = { version = "=1.0.0", optional = true, features = ['std'] } # pinne hickory-resolver = "0.25.1" igd-next = { version = "0.16", features = ["aio_tokio"] } netdev = { version = "0.31.0" } -portmapper = { version = "0.5.0", default-features = false } +portmapper = { version = "0.6.0", default-features = false } quinn = { package = "iroh-quinn", version = "0.14.0", default-features = false, features = ["runtime-tokio", "rustls-ring"] } tokio = { version = "1", features = [ "io-util", @@ -134,6 +134,7 @@ getrandom = { version = "0.3.2", features = ["wasm_js"] } # target-common test/dev dependencies [dev-dependencies] +console_error_panic_hook = "0.1" postcard = { version = "1.1.1", features = ["use-std"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/iroh/README.md b/iroh/README.md index 025516e5d6f..751e705d542 100644 --- a/iroh/README.md +++ b/iroh/README.md @@ -4,9 +4,9 @@ Iroh is a library to establish direct connectivity between peers. It's built on peer-to-peer [QUIC](https://en.wikipedia.org/wiki/QUIC) using both relays and holepunching. The main structure for connection is the `Endpoint` entrypoint. -Peer to peer connectivity is established with the help of a _relay server_. The relay server provides Session Traversal Utilities for NAT [(STUN)](https://en.wikipedia.org/wiki/STUN) for the peers. If no direct connection can be established, the connection is relayed via the server. +Peer to peer connectivity is established with the help of a _relay server_. The relay server provides [QUIC Address Discovery](https://www.ietf.org/archive/id/draft-ietf-quic-address-discovery-00.html) (QAD) and hole-punching assistance for the peers. If no direct connection can be established, the connection is relayed via the server. -Peers must know and do verify the PeerID of each other before they can connect. When using a relay server to aid the connection establishment they will register with a home relay server using their PublicKey. Other peers which can not establish a direct connection can then establish connection via this relay server. This will try to assist establishing a direct connection using STUN and holepunching but continue relaying if not possible. +Peers must know and do verify the PeerID of each other before they can connect. When using a relay server to aid the connection establishment they will register with a home relay server using their PublicKey. Other peers which can not establish a direct connection can then establish connection via this relay server. This will try to assist establishing a direct connection using QAD and holepunching but continue relaying if not possible. Peers can also connect directly without using a relay server. For this, however the listening peer must be directly reachable by the connecting peer via one of it's addresses. diff --git a/iroh/bench/src/bin/bulk.rs b/iroh/bench/src/bin/bulk.rs index 292ea89b697..37ff37e06b0 100644 --- a/iroh/bench/src/bin/bulk.rs +++ b/iroh/bench/src/bin/bulk.rs @@ -95,7 +95,6 @@ pub fn run_iroh(opt: Opt) -> Result<()> { #[cfg(feature = "local-relay")] if let Some(relay_server) = relay_server.as_ref() { collect_and_print("RelayServerMetrics", &*relay_server.metrics().server); - collect_and_print("RelayStunMetrics", &*relay_server.metrics().stun); } } diff --git a/iroh/docs/local_relay_node.md b/iroh/docs/local_relay_node.md index cb6736478fa..a90bfee3783 100644 --- a/iroh/docs/local_relay_node.md +++ b/iroh/docs/local_relay_node.md @@ -16,8 +16,6 @@ To connect to this iroh-relay when doing your normal iroh commands, adjust the i # iroh.config.toml: [[relay_nodes]] url = "http://localhost:3340" -stun_only = false -stun_port = 3478 ``` If you want to give a specific port for the iroh-relay to bind to, you can create a iroh-relay config file and pass that file in using the `--config_path` flag. You need to retain a `secret_key`, so it is recommended to run `iroh-relay --config-path [PATH]` once to generate a secret key and save it to the config file before doing further edits to the file. @@ -29,13 +27,10 @@ To change the port you want to listen on, change the port in the `addr` field: secret_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" addr = "[::]:12345" -stun_port = 3478 hostname = "my.relay.network" -enable_stun = true enable_relay = true ``` Check [the iroh-relay file's](../src/bin/iroh-relay.rs) `Config` struct for documentation on each configuration field. If you change the local iroh-relay server's configuration, however, be sure to adjust the associated fields in your iroh config as well. - diff --git a/iroh/docs/relay_nodes.md b/iroh/docs/relay_nodes.md index 12dbee03b00..c8c8d643043 100644 --- a/iroh/docs/relay_nodes.md +++ b/iroh/docs/relay_nodes.md @@ -14,8 +14,6 @@ RelayNode { url: format!("https://derp.iroh.network") .parse() .unwrap(), - stun_only: false, - stun_port: 3478, } ``` @@ -26,7 +24,5 @@ RelayNode { url: format!("https://eu1.derp.iroh.network") .parse() .unwrap(), - stun_only: false, - stun_port: 3478, } ``` diff --git a/iroh/examples/transfer.rs b/iroh/examples/transfer.rs index 371f717ce61..76b8c56321f 100644 --- a/iroh/examples/transfer.rs +++ b/iroh/examples/transfer.rs @@ -189,6 +189,9 @@ impl EndpointArgs { } }; builder = builder.secret_key(secret_key); + if Env::Dev == self.env { + builder = builder.insecure_skip_relay_cert_verify(true); + } let relay_mode = if self.no_relay { RelayMode::Disabled @@ -245,17 +248,31 @@ impl EndpointArgs { let node_id = endpoint.node_id(); println!("Our node id:\n\t{node_id}"); + + let eps = endpoint.direct_addresses().initialized().await?; println!("Our direct addresses:"); - for local_endpoint in endpoint.direct_addresses().initialized().await? { + for local_endpoint in eps { println!("\t{} (type: {:?})", local_endpoint.addr, local_endpoint.typ) } - if !self.no_relay { - let relay_url = endpoint - .home_relay() - .get()? - .pop() - .context("Failed to resolve our home relay")?; - println!("Our home relay server:\n\t{relay_url}"); + + if self.relay_only { + let relay_url = endpoint.home_relay().initialized().await?; + println!("Our home relay server:\t{relay_url}"); + } else if !self.no_relay { + let relay_url = tokio::time::timeout(Duration::from_secs(2), async { + endpoint + .home_relay() + .initialized() + .await + .expect("disconnected") + }) + .await + .ok(); + if let Some(url) = relay_url { + println!("Our home relay server:\t{url}"); + } else { + println!("No home relay server found"); + } } println!(); diff --git a/iroh/src/defaults.rs b/iroh/src/defaults.rs index 6b0aa6eb6d0..c7d96a7db36 100644 --- a/iroh/src/defaults.rs +++ b/iroh/src/defaults.rs @@ -5,11 +5,6 @@ /// /// The port is "QUIC" typed on a phone keypad. pub use iroh_relay::defaults::DEFAULT_RELAY_QUIC_PORT; -/// The default STUN port used by the Relay server. -/// -/// The STUN port as defined by [RFC -/// 8489]() -pub use iroh_relay::defaults::DEFAULT_STUN_PORT; use url::Url; /// The default HTTP port used by the Relay server. @@ -51,8 +46,6 @@ pub mod prod { .expect("default url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -65,8 +58,6 @@ pub mod prod { .expect("default_url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -79,8 +70,6 @@ pub mod prod { .expect("default_url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -114,8 +103,6 @@ pub mod staging { .expect("default url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } @@ -128,8 +115,6 @@ pub mod staging { .expect("default_url"); RelayNode { url: url.into(), - stun_only: false, - stun_port: DEFAULT_STUN_PORT, quic: Some(RelayQuicConfig::default()), } } diff --git a/iroh/src/discovery/dns.rs b/iroh/src/discovery/dns.rs index 950160af229..aa4d7d7b0f3 100644 --- a/iroh/src/discovery/dns.rs +++ b/iroh/src/discovery/dns.rs @@ -11,7 +11,7 @@ use crate::{ Endpoint, }; -const DNS_STAGGERING_MS: &[u64] = &[200, 300]; +pub(crate) const DNS_STAGGERING_MS: &[u64] = &[200, 300]; /// DNS node discovery /// diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 146407b8ba3..976cab46c22 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -1017,7 +1017,7 @@ impl Endpoint { /// iroh nodes to establish direct connectivity, depending on the network /// situation. The yielded lists of direct addresses contain both the locally-bound /// addresses and the [`Endpoint`]'s publicly reachable addresses discovered through - /// mechanisms such as [STUN] and port mapping. Hence usually only a subset of these + /// mechanisms such as [QAD] and port mapping. Hence usually only a subset of these /// will be applicable to a certain remote iroh node. /// /// The [`Endpoint`] continuously monitors the direct addresses for changes as its own @@ -1045,7 +1045,7 @@ impl Endpoint { /// # }); /// ``` /// - /// [STUN]: https://en.wikipedia.org/wiki/STUN + /// [QAD]: https://www.ietf.org/archive/id/draft-ietf-quic-address-discovery-00.html pub fn direct_addresses(&self) -> n0_watcher::Direct>> { self.msock.direct_addresses() } @@ -1081,7 +1081,7 @@ impl Endpoint { /// # }); /// ``` #[doc(hidden)] - pub fn net_report(&self) -> n0_watcher::Direct>> { + pub fn net_report(&self) -> impl Watcher> { self.msock.net_report() } @@ -2816,8 +2816,8 @@ mod tests { #[tokio::test] #[traced_test] - async fn test_direct_addresses_no_stun_relay() -> Result { - let (relay_map, _, _guard) = run_relay_server_with(None, false).await?; + async fn test_direct_addresses_no_qad_relay() -> Result { + let (relay_map, _, _guard) = run_relay_server_with(false).await.unwrap(); let ep = Endpoint::builder() .alpns(vec![TEST_ALPN.to_vec()]) @@ -3226,7 +3226,7 @@ mod tests { .await?; // can get a first report - endpoint.net_report().initialized().await?; + endpoint.net_report().updated().await?; Ok(()) } diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 982c412d5f6..1d0200afb9f 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -105,7 +105,7 @@ //! //! Additionally to providing reliable connectivity between iroh nodes, Relay servers //! provide some functions to assist in [hole punching]. They have various services to help -//! nodes understand their own network situation. This includes offering a [STUN] server, +//! nodes understand their own network situation. This includes offering a [QAD] server, //! but also a few HTTP extra endpoints as well as responding to ICMP echo requests. //! //! By default the [number 0] relay servers are used, see [`RelayMode::Default`]. @@ -232,7 +232,7 @@ //! [bi-directional streams]: crate::endpoint::Connection::open_bi //! [hole punching]: https://en.wikipedia.org/wiki/Hole_punching_(networking) //! [socket addresses]: https://doc.rust-lang.org/stable/std/net/enum.SocketAddr.html -//! [STUN]: https://en.wikipedia.org/wiki/STUN +//! [QAD]: https://www.ietf.org/archive/id/draft-ietf-quic-address-discovery-00.html //! [ALPN]: https://en.wikipedia.org/wiki/Application-Layer_Protocol_Negotiation //! [HTTP3]: https://en.wikipedia.org/wiki/HTTP/3 //! [`SecretKey`]: crate::SecretKey diff --git a/iroh/src/magicsock.rs b/iroh/src/magicsock.rs index c73cd457bca..c8a753d8e74 100644 --- a/iroh/src/magicsock.rs +++ b/iroh/src/magicsock.rs @@ -31,10 +31,10 @@ use std::{ use bytes::Bytes; use data_encoding::HEXLOWER; use iroh_base::{NodeAddr, NodeId, PublicKey, RelayUrl, SecretKey}; -use iroh_relay::{protos::stun, RelayMap}; +use iroh_relay::RelayMap; use n0_future::{ boxed::BoxStream, - task::{self, JoinSet}, + task::{self, AbortOnDropHandle}, time::{self, Duration, Instant}, StreamExt, }; @@ -44,13 +44,13 @@ use netwatch::netmon; #[cfg(not(wasm_browser))] use netwatch::{ip::LocalAddresses, UdpSocket}; use quinn::{AsyncUdpSocket, ServerConfig}; -use rand::{seq::SliceRandom, Rng, SeedableRng}; +use rand::Rng; use smallvec::SmallVec; -use snafu::{IntoError, ResultExt, Snafu}; -use tokio::sync::{self, mpsc, Mutex}; +use snafu::{ResultExt, Snafu}; +use tokio::sync::{mpsc, Mutex}; +use tokio_util::sync::CancellationToken; use tracing::{ - debug, error, error_span, event, info, info_span, instrument, trace, trace_span, warn, - Instrument, Level, Span, + debug, error, event, info, info_span, instrument, trace, trace_span, warn, Instrument, Level, }; use transports::LocalAddrsWatch; use url::Url; @@ -74,7 +74,7 @@ use crate::{ discovery::{Discovery, DiscoveryItem, DiscoverySubscribers, NodeData, UserData}, key::{public_ed_box, secret_ed_box, DecryptionError, SharedSecret}, metrics::EndpointMetrics, - net_report::{self, IpMappedAddresses, Report, ReportError}, + net_report::{self, IfStateDetails, IpMappedAddresses, Report}, }; mod metrics; @@ -89,7 +89,7 @@ pub use self::{ node_map::{ConnectionType, ControlMsg, DirectAddrInfo, RemoteInfo}, }; -/// How long we consider a STUN-derived endpoint valid for. UDP NAT mappings typically +/// How long we consider a QAD-derived endpoint valid for. UDP NAT mappings typically /// expire at 30 seconds, so this is a few seconds shy of that. const ENDPOINTS_FRESH_ENOUGH_DURATION: Duration = Duration::from_secs(27); @@ -162,8 +162,10 @@ type RelayContents = SmallVec<[Bytes; 1]>; pub(crate) struct Handle { #[deref(forward)] msock: Arc, - // Empty when closed - actor_tasks: Arc>>, + // empty when shutdown + actor_task: Arc>>>, + /// Token to cancel the actor task. + actor_token: CancellationToken, // quinn endpoint endpoint: quinn::Endpoint, } @@ -178,69 +180,52 @@ pub(crate) struct Handle { /// It is usually only necessary to use a single [`MagicSock`] instance in an application, it /// means any QUIC endpoints on top will be sharing as much information about nodes as /// possible. -#[derive(derive_more::Debug)] +#[derive(Debug)] pub(crate) struct MagicSock { + /// Channel to send to the internal actor. actor_sender: mpsc::Sender, - /// String representation of the node_id of this node. - me: String, - - /// The DNS resolver to be used in this magicsock. - #[cfg(not(wasm_browser))] - dns_resolver: DnsResolver, - - /// Key for this node. - secret_key: SecretKey, - /// Encryption key for this node. - secret_encryption_key: crypto_box::SecretKey, + /// NodeId of this node. + public_key: PublicKey, + // - State Management /// Close is in progress (or done) closing: AtomicBool, /// Close was called. closed: AtomicBool, + + // - Networking Info + /// Our discovered direct addresses. + direct_addrs: DiscoveredDirectAddrs, + /// Our latest net-report + net_report: Watchable<(Option, UpdateReason)>, /// If the last net_report report, reports IPv6 to be available. ipv6_reported: Arc, - - /// Zero nodes means relay is disabled. - relay_map: RelayMap, /// Tracks the networkmap node entity for each node discovery key. node_map: NodeMap, /// Tracks the mapped IP addresses ip_mapped_addrs: IpMappedAddresses, - /// NetReport client - net_reporter: net_report::Addr, - /// The state for an active DiscoKey. - disco_secrets: DiscoSecrets, + /// Local addresses + local_addrs_watch: LocalAddrsWatch, + /// Currently bound IP addresses of all sockets + #[cfg(not(wasm_browser))] + ip_bind_addrs: Vec, + /// The DNS resolver to be used in this magicsock. + #[cfg(not(wasm_browser))] + dns_resolver: DnsResolver, - /// Disco (ping) queue - disco_sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>, + /// Disco + disco: DiscoState, + // - Discovery /// Optional discovery service discovery: Option>, - /// Optional user-defined discover data. discovery_user_data: RwLock>, - - /// Our discovered direct addresses. - direct_addrs: DiscoveredDirectAddrs, - - /// Our latest net-report - net_report: Watchable>>, - - /// List of CallMeMaybe disco messages that should be sent out after the next endpoint update - /// completes - pending_call_me_maybes: std::sync::Mutex>, - - /// Indicates the direct addr update state. - direct_addr_update_state: DirectAddrUpdateState, - /// Broadcast channel for listening to discovery updates. discovery_subscribers: DiscoverySubscribers, + /// Metrics pub(crate) metrics: EndpointMetrics, - - local_addrs_watch: LocalAddrsWatch, - #[cfg(not(wasm_browser))] - ip_bind_addrs: Vec, } #[allow(missing_docs)] @@ -288,10 +273,6 @@ impl MagicSock { self.closed.load(Ordering::SeqCst) } - fn public_key(&self) -> PublicKey { - self.secret_key.public() - } - /// Get the cached version of addresses. pub(crate) fn local_addr(&self) -> Vec { self.local_addrs_watch.get().expect("disconnected") @@ -358,8 +339,11 @@ impl MagicSock { /// /// [`Watcher`]: n0_watcher::Watcher /// [`Watcher::initialized`]: n0_watcher::Watcher::initialized - pub(crate) fn net_report(&self) -> n0_watcher::Direct>> { - self.net_report.watch() + pub(crate) fn net_report(&self) -> impl Watcher> { + self.net_report + .watch() + .map(|(r, _)| r) + .expect("disconnected") } /// Watch for changes to the home relay. @@ -402,7 +386,7 @@ impl MagicSock { } /// Add addresses for a node to the magic socket's addresbook. - #[instrument(skip_all, fields(me = %self.me))] + #[instrument(skip_all)] pub fn add_node_addr( &self, mut addr: NodeAddr, @@ -640,7 +624,7 @@ impl MagicSock { self.metrics.magicsock.recv_gro_datagrams.inc(); } - // Chunk through the datagrams in this GRO payload to find disco and stun + // Chunk through the datagrams in this GRO payload to find disco // packets and forward them to the actor for datagram in buf[..quinn_meta.len].chunks_mut(quinn_meta.stride) { if datagram.len() < quinn_meta.stride { @@ -651,19 +635,11 @@ impl MagicSock { ); } - // Detect DISCO and STUN datagrams and process them. Overwrite the first + // Detect DISCO datagrams and process them. Overwrite the first // byte of those packets with zero to make Quinn ignore the packet. This // relies on quinn::EndpointConfig::grease_quic_bit being set to `false`, // which we do in Endpoint::bind. - if source_addr.is_ip() && stun::is(datagram) { - trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: stun packet"); - let packet2 = Bytes::copy_from_slice(datagram); - self.net_reporter.receive_stun_packet( - packet2, - source_addr.clone().into_socket_addr().expect("checked"), - ); - datagram[0] = 0u8; - } else if let Some((sender, sealed_box)) = disco::source_and_box(datagram) { + if let Some((sender, sealed_box)) = disco::source_and_box(datagram) { trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: disco packet"); self.handle_disco_message(sender, sealed_box, source_addr); datagram[0] = 0u8; @@ -752,7 +728,7 @@ impl MagicSock { } } } else { - // If all datagrams in this buf are DISCO or STUN, set len to zero to make + // If all datagrams in this buf are DISCO, set len to zero to make // Quinn skip the buf completely. quinn_meta.len = 0; } @@ -780,17 +756,17 @@ impl MagicSock { if let transports::Addr::Relay(_, node_id) = src { if node_id != &sender { // TODO: return here? - warn!("Received relay disco message from connection for {:?}, but with message from {}", node_id.fmt_short(), sender.fmt_short()); + warn!( + "Received relay disco message from connection for {}, but with message from {}", + node_id.fmt_short(), + sender.fmt_short() + ); } } // We're now reasonably sure we're expecting communication from // this node, do the heavy crypto lifting to see what they want. - let dm = match self.disco_secrets.unseal_and_decode( - &self.secret_encryption_key, - sender, - sealed_box.to_vec(), - ) { + let dm = match self.disco.unseal_and_decode(sender, sealed_box) { Ok(dm) => dm, Err(DiscoBoxError::Open { source, .. }) => { warn!(?source, "failed to open disco box"); @@ -898,11 +874,7 @@ impl MagicSock { txn = ?dm.tx_id, ); - if self - .disco_sender - .try_send((addr.clone(), sender, pong)) - .is_err() - { + if !self.disco.try_send(addr.clone(), sender, pong) { warn!(%addr, "failed to queue pong"); } @@ -916,15 +888,6 @@ impl MagicSock { } } - fn encode_disco_message(&self, dst_key: PublicKey, msg: &disco::Message) -> Bytes { - self.disco_secrets.encode_and_seal( - &self.secret_encryption_key, - self.secret_key.public(), - dst_key, - msg, - ) - } - fn send_ping_queued(&self, ping: SendPing) { let SendPing { id, @@ -935,13 +898,9 @@ impl MagicSock { } = ping; let msg = disco::Message::Ping(disco::Ping { tx_id, - node_key: self.public_key(), + node_key: self.public_key, }); - let sent = self - .disco_sender - .try_send((dst.clone(), dst_node, msg)) - .is_ok(); - + let sent = self.disco.try_send(dst.clone(), dst_node, msg); if sent { let msg_sender = self.actor_sender.clone(); trace!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent (queued)"); @@ -952,7 +911,7 @@ impl MagicSock { } } - /// Tries to send the ping actions. + /// Send the given ping actions out. async fn send_ping_actions(&self, sender: &UdpSender, msgs: Vec) -> io::Result<()> { for msg in msgs { // Abort sending as soon as we know we are shutting down. @@ -961,20 +920,68 @@ impl MagicSock { } match msg { PingAction::SendCallMeMaybe { - ref relay_url, + relay_url, dst_node, } => { - self.send_or_queue_call_me_maybe(relay_url, dst_node); + // Sends the call-me-maybe DISCO message, queuing if addresses are too stale. + // + // To send the call-me-maybe message, we need to know our current direct addresses. If + // this information is too stale, the call-me-maybe is queued while a net_report run is + // scheduled. Once this run finishes, the call-me-maybe will be sent. + match self.direct_addrs.fresh_enough() { + Ok(()) => { + let msg = disco::Message::CallMeMaybe( + self.direct_addrs.to_call_me_maybe_message(), + ); + if !self.disco.try_send( + SendAddr::Relay(relay_url.clone()), + dst_node, + msg.clone(), + ) { + warn!(dstkey = %dst_node.fmt_short(), %relay_url, "relay channel full, dropping call-me-maybe"); + } else { + debug!(dstkey = %dst_node.fmt_short(), %relay_url, "call-me-maybe sent"); + } + } + Err(last_refresh_ago) => { + debug!( + ?last_refresh_ago, + "want call-me-maybe but direct addrs stale; queuing after restun", + ); + self.actor_sender + .try_send(ActorMessage::ScheduleDirectAddrUpdate( + UpdateReason::RefreshForPeering, + Some((dst_node, relay_url)), + )) + .ok(); + } + } } - PingAction::SendPing(ping) => { - self.send_ping(sender, ping).await?; + PingAction::SendPing(SendPing { + id, + dst, + dst_node, + tx_id, + purpose, + }) => { + let msg = disco::Message::Ping(disco::Ping { + tx_id, + node_key: self.public_key, + }); + + self.send_disco_message(sender, dst.clone(), dst_node, msg) + .await?; + debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent"); + let msg_sender = self.actor_sender.clone(); + self.node_map + .notify_ping_sent(id, dst, tx_id, purpose, msg_sender); } } } Ok(()) } - /// Send a disco message. UDP messages will be polled to send directly on the UDP socket. + /// Sends out a disco message. async fn send_disco_message( &self, sender: &UdpSender, @@ -994,7 +1001,8 @@ impl MagicSock { "connection closed", )); } - let pkt = self.encode_disco_message(dst_key, &msg); + + let pkt = self.disco.encode_and_seal(self.public_key, dst_key, &msg); let transmit = transports::Transmit { contents: &pkt, @@ -1017,90 +1025,6 @@ impl MagicSock { } } - async fn send_ping(&self, sender: &UdpSender, ping: SendPing) -> io::Result<()> { - let SendPing { - id, - dst, - dst_node, - tx_id, - purpose, - } = ping; - let msg = disco::Message::Ping(disco::Ping { - tx_id, - node_key: self.public_key(), - }); - - self.send_disco_message(sender, dst.clone(), dst_node, msg) - .await?; - debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent"); - let msg_sender = self.actor_sender.clone(); - self.node_map - .notify_ping_sent(id, dst.clone(), tx_id, purpose, msg_sender); - Ok(()) - } - - fn send_queued_call_me_maybes(&self) { - let msg = self.direct_addrs.to_call_me_maybe_message(); - let msg = disco::Message::CallMeMaybe(msg); - for (public_key, url) in self - .pending_call_me_maybes - .lock() - .expect("poisoned") - .drain() - { - if self - .disco_sender - .try_send((SendAddr::Relay(url), public_key, msg.clone())) - .is_err() - { - warn!(node = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe"); - } - } - } - - /// Sends the call-me-maybe DISCO message, queuing if addresses are too stale. - /// - /// To send the call-me-maybe message, we need to know our current direct addresses. If - /// this information is too stale, the call-me-maybe is queued while a net_report run is - /// scheduled. Once this run finishes, the call-me-maybe will be sent. - fn send_or_queue_call_me_maybe(&self, url: &RelayUrl, dst_node: NodeId) { - match self.direct_addrs.fresh_enough() { - Ok(()) => { - let msg = self.direct_addrs.to_call_me_maybe_message(); - let msg = disco::Message::CallMeMaybe(msg); - if self - .disco_sender - .try_send((SendAddr::Relay(url.clone()), dst_node, msg.clone())) - .is_err() - { - warn!(dstkey = %dst_node.fmt_short(), relayurl = %url, - "relay channel full, dropping call-me-maybe"); - } else { - debug!(dstkey = %dst_node.fmt_short(), relayurl = %url, "call-me-maybe sent"); - } - } - Err(last_refresh_ago) => { - self.pending_call_me_maybes - .lock() - .expect("poisoned") - .insert(dst_node, url.clone()); - debug!( - ?last_refresh_ago, - "want call-me-maybe but direct addrs stale; queuing after restun", - ); - self.re_stun("refresh-for-peering"); - } - } - } - - /// Triggers an address discovery. The provided why string is for debug logging only. - #[instrument(skip_all)] - fn re_stun(&self, why: &'static str) { - debug!("re_stun: {}", why); - self.metrics.magicsock.re_stun_calls.inc(); - self.direct_addr_update_state.schedule_run(why); - } - /// Publishes our address to a discovery service, if configured. /// /// Called whenever our addresses or home relay node changes. @@ -1159,49 +1083,125 @@ impl From for MappedAddr { /// and start a new one when the current one has finished #[derive(Debug)] struct DirectAddrUpdateState { - /// If running, set to the reason for the currently the update. - running: sync::watch::Sender>, /// If set, start a new update as soon as the current one is finished. - want_update: std::sync::Mutex>, + want_update: Option, + msock: Arc, + #[cfg(not(wasm_browser))] + port_mapper: portmapper::Client, + /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings. + net_reporter: Arc>, + relay_map: RelayMap, + run_done: mpsc::Sender<()>, +} + +#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)] +enum UpdateReason { + /// Initial state + #[default] + None, + RefreshForPeering, + Periodic, + PortmapUpdated, + LinkChangeMajor, + LinkChangeMinor, +} + +impl UpdateReason { + fn is_major(self) -> bool { + matches!(self, Self::LinkChangeMajor) + } } impl DirectAddrUpdateState { - fn new() -> Self { - let (running, _) = sync::watch::channel(None); + fn new( + msock: Arc, + #[cfg(not(wasm_browser))] port_mapper: portmapper::Client, + net_reporter: Arc>, + relay_map: RelayMap, + run_done: mpsc::Sender<()>, + ) -> Self { DirectAddrUpdateState { - running, want_update: Default::default(), + #[cfg(not(wasm_browser))] + port_mapper, + net_reporter, + msock, + relay_map, + run_done, } } /// Schedules a new run, either starting it immediately if none is running or /// scheduling it for later. - fn schedule_run(&self, why: &'static str) { - if self.is_running() { - let _ = self.want_update.lock().expect("poisoned").insert(why); - } else { - self.run(why); + fn schedule_run(&mut self, why: UpdateReason, if_state: IfStateDetails) { + match self.net_reporter.clone().try_lock_owned() { + Ok(net_reporter) => { + self.run(why, if_state, net_reporter); + } + Err(_) => { + let _ = self.want_update.insert(why); + } } } - /// Returns `true` if an update is currently in progress. - fn is_running(&self) -> bool { - self.running.borrow().is_some() + /// If another run is needed, triggers this run, otherwise does nothing. + fn try_run(&mut self, if_state: IfStateDetails) { + match self.net_reporter.clone().try_lock_owned() { + Ok(net_reporter) => { + if let Some(why) = self.want_update.take() { + self.run(why, if_state, net_reporter); + } + } + Err(_) => { + // do nothing + } + } } /// Trigger a new run. - fn run(&self, why: &'static str) { - self.running.send(Some(why)).ok(); - } + fn run( + &mut self, + why: UpdateReason, + if_state: IfStateDetails, + mut net_reporter: tokio::sync::OwnedMutexGuard, + ) { + debug!("starting direct addr update ({:?})", why); + #[cfg(not(wasm_browser))] + self.port_mapper.procure_mapping(); + // Don't start a net report probe if we know + // we are shutting down + if self.msock.is_closing() || self.msock.is_closed() { + debug!("skipping net_report, socket is shutting down"); + return; + } + if self.relay_map.is_empty() { + debug!("skipping net_report, empty RelayMap"); + self.msock.net_report.set((None, why)).ok(); + return; + } - /// Clears the current running state. - fn finish_run(&self) { - self.running.send(None).ok(); - } + debug!("requesting net_report report"); + let msock = self.msock.clone(); - /// Returns the next update, if one is set. - fn next_update(&self) -> Option<&'static str> { - self.want_update.lock().expect("poisoned").take() + let run_done = self.run_done.clone(); + task::spawn(async move { + let fut = time::timeout( + NET_REPORT_TIMEOUT, + net_reporter.get_report(if_state, why.is_major()), + ); + match fut.await { + Ok(report) => { + msock.net_report.set((Some(report), why)).ok(); + } + Err(time::Elapsed { .. }) => { + warn!("net_report report timed out"); + } + } + + // mark run as finished + debug!("direct addr update done ({:?})", why); + run_done.send(()).await.ok(); + }); } } @@ -1229,14 +1229,6 @@ pub enum CreateHandleError { impl Handle { /// Creates a magic [`MagicSock`] listening on [`Options::addr_v4`] and [`Options::addr_v6`]. async fn new(opts: Options) -> Result { - let me = opts.secret_key.public().fmt_short(); - - Self::with_name(me, opts) - .instrument(error_span!("magicsock")) - .await - } - - async fn with_name(me: String, opts: Options) -> Result { let Options { addr_v4, addr_v6, @@ -1263,35 +1255,9 @@ impl Handle { let (ip_transports, port_mapper) = bind_ip(addr_v4, addr_v6, &metrics).context(BindSocketsSnafu)?; - #[cfg(not(wasm_browser))] - let v4_socket = ip_transports - .iter() - .find(|t| t.bind_addr().is_ipv4()) - .expect("must bind a ipv4 socket") - .socket(); - #[cfg(not(wasm_browser))] - let v6_socket = ip_transports.iter().find_map(|t| { - if t.bind_addr().is_ipv6() { - Some(t.socket()) - } else { - None - } - }); - let ip_mapped_addrs = IpMappedAddresses::default(); - let net_reporter = net_report::Client::new( - #[cfg(not(wasm_browser))] - Some(port_mapper.clone()), - #[cfg(not(wasm_browser))] - dns_resolver.clone(), - #[cfg(not(wasm_browser))] - Some(ip_mapped_addrs.clone()), - metrics.net_report.clone(), - ); - let (actor_sender, actor_receiver) = mpsc::channel(256); - let (disco_sender, mut disco_receiver) = mpsc::channel(256); // load the node data let node_map = node_map.unwrap_or_default(); @@ -1326,30 +1292,25 @@ impl Handle { #[cfg(wasm_browser)] let transports = Transports::new(relay_transports); + let (disco, disco_receiver) = DiscoState::new(secret_encryption_key); + let msock = Arc::new(MagicSock { - me, - secret_key, - secret_encryption_key, + public_key: secret_key.public(), closing: AtomicBool::new(false), closed: AtomicBool::new(false), + disco, actor_sender: actor_sender.clone(), ipv6_reported, - relay_map, - net_reporter: net_reporter.addr(), - disco_secrets: DiscoSecrets::default(), node_map, - ip_mapped_addrs, - disco_sender, + ip_mapped_addrs: ip_mapped_addrs.clone(), discovery, discovery_user_data: RwLock::new(discovery_user_data), direct_addrs: Default::default(), - net_report: Default::default(), - pending_call_me_maybes: Default::default(), - direct_addr_update_state: DirectAddrUpdateState::new(), + net_report: Watchable::new((None, UpdateReason::None)), #[cfg(not(wasm_browser))] - dns_resolver, + dns_resolver: dns_resolver.clone(), discovery_subscribers: DiscoverySubscribers::new(), - metrics, + metrics: metrics.clone(), local_addrs_watch: transports.local_addrs_watch(), #[cfg(not(wasm_browser))] ip_bind_addrs: transports.ip_bind_addrs(), @@ -1363,8 +1324,7 @@ impl Handle { // the packet if grease_quic_bit is set to false. endpoint_config.grease_quic_bit(false); - let sender1 = transports.create_sender(msock.clone()); - let sender2 = transports.create_sender(msock.clone()); + let sender = transports.create_sender(msock.clone()); let local_addrs_watch = transports.local_addrs_watch(); let network_change_sender = transports.create_network_change_sender(); @@ -1382,23 +1342,10 @@ impl Handle { ) .context(CreateQuinnEndpointSnafu)?; - let mut actor_tasks = JoinSet::default(); - - #[cfg(not(wasm_browser))] - let _ = actor_tasks.spawn({ - let msock = msock.clone(); - async move { - while let Some((dst, dst_key, msg)) = disco_receiver.recv().await { - if let Err(err) = msock.send_disco_message(&sender1, dst.clone(), dst_key, msg).await { - warn!(%dst, node = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)"); - } - } - } - }); - let network_monitor = netmon::Monitor::new() .await .context(CreateNetmonMonitorSnafu)?; + let qad_endpoint = endpoint.clone(); #[cfg(any(test, feature = "test-utils"))] @@ -1412,46 +1359,66 @@ impl Handle { let net_report_config = net_report::Options::default(); #[cfg(not(wasm_browser))] - let net_report_config = net_report_config - .stun_v4(Some(v4_socket)) - .stun_v6(v6_socket) - .quic_config(Some(QuicConfig { - ep: qad_endpoint, - client_config, - ipv4: true, - ipv6, - })); + let net_report_config = net_report_config.quic_config(Some(QuicConfig { + ep: qad_endpoint, + client_config, + ipv4: true, + ipv6, + })); #[cfg(any(test, feature = "test-utils"))] let net_report_config = net_report_config.insecure_skip_relay_cert_verify(insecure_skip_relay_cert_verify); + let net_reporter = net_report::Client::new( + #[cfg(not(wasm_browser))] + dns_resolver, + #[cfg(not(wasm_browser))] + Some(ip_mapped_addrs), + relay_map.clone(), + net_report_config, + metrics.net_report.clone(), + ); + + let (direct_addr_done_tx, direct_addr_done_rx) = mpsc::channel(8); + let direct_addr_update_state = DirectAddrUpdateState::new( + msock.clone(), + #[cfg(not(wasm_browser))] + port_mapper, + Arc::new(Mutex::new(net_reporter)), + relay_map, + direct_addr_done_tx, + ); + + let netmon_watcher = network_monitor.interface_state(); let actor = Actor { msg_receiver: actor_receiver, - msg_sender: actor_sender, msock: msock.clone(), periodic_re_stun_timer: new_re_stun_timer(false), - net_info_last: None, - #[cfg(not(wasm_browser))] - port_mapper, - no_v4_send: false, - net_reporter, network_monitor, - net_report_config, + netmon_watcher, + direct_addr_update_state, network_change_sender, + direct_addr_done_rx, + pending_call_me_maybes: Default::default(), + disco_receiver, }; - actor_tasks.spawn( + + let actor_token = CancellationToken::new(); + let token = actor_token.clone(); + let actor_task = task::spawn( actor - .run(local_addrs_watch, sender2) + .run(token, local_addrs_watch, sender) .instrument(info_span!("actor")), ); - let actor_tasks = Arc::new(Mutex::new(actor_tasks)); + let actor_task = Arc::new(Mutex::new(Some(AbortOnDropHandle::new(actor_task)))); Ok(Handle { msock, - actor_tasks, + actor_task, endpoint, + actor_token, }) } @@ -1465,9 +1432,9 @@ impl Handle { /// Only the first close does anything. Any later closes return nil. /// Polling the socket ([`AsyncUdpSocket::poll_recv`]) will return [`Poll::Pending`] /// indefinitely after this call. - #[instrument(skip_all, fields(me = %self.msock.me))] + #[instrument(skip_all)] pub(crate) async fn close(&self) { - trace!("magicsock closing..."); + trace!(me = ?self.public_key, "magicsock closing..."); // Initiate closing all connections, and refuse future connections. self.endpoint.close(0u16.into(), b""); @@ -1492,38 +1459,27 @@ impl Handle { return; } self.msock.closing.store(true, Ordering::Relaxed); - // If this fails, then there's no receiver listening for shutdown messages, - // so nothing to shut down anyways. - self.msock - .actor_sender - .send(ActorMessage::Shutdown) - .await - .ok(); - self.msock.closed.store(true, Ordering::SeqCst); + self.actor_token.cancel(); - let mut tasks = self.actor_tasks.lock().await; - - // give the tasks a moment to shutdown cleanly - let tasks_ref = &mut tasks; - let shutdown_done = time::timeout(Duration::from_millis(100), async move { - while let Some(task) = tasks_ref.join_next().await { - if let Err(err) = task { + if let Some(task) = self.actor_task.lock().await.take() { + // give the tasks a moment to shutdown cleanly + let shutdown_done = time::timeout(Duration::from_millis(100), async move { + if let Err(err) = task.await { warn!("unexpected error in task shutdown: {:?}", err); } - } - }) - .await; - match shutdown_done { - Ok(_) => trace!("tasks finished in time, shutdown complete"), - Err(_elapsed) => { - // shutdown all tasks - warn!( - "tasks didn't finish in time, aborting remaining {}/3 tasks", - tasks.len() - ); - tasks.shutdown().await; + }) + .await; + match shutdown_done { + Ok(_) => trace!("tasks finished in time, shutdown complete"), + Err(time::Elapsed { .. }) => { + // Dropping the task will abort itt + warn!("tasks didn't finish in time, aborting"); + } } } + + self.msock.closed.store(true, Ordering::SeqCst); + trace!("magicsock closed"); } } @@ -1541,45 +1497,69 @@ fn default_quic_client_config() -> rustls::ClientConfig { .with_no_client_auth() } -#[derive(Debug, Default)] -struct DiscoSecrets(std::sync::Mutex>); +#[derive(Debug)] +struct DiscoState { + /// Encryption key for this node. + secret_encryption_key: crypto_box::SecretKey, + /// The state for an active DiscoKey. + secrets: std::sync::Mutex>, + /// Disco (ping) queue + sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>, +} -impl DiscoSecrets { - fn get(&self, secret: &crypto_box::SecretKey, node_id: PublicKey, cb: F) -> T - where - F: FnOnce(&mut SharedSecret) -> T, - { - let mut inner = self.0.lock().expect("poisoned"); - let x = inner.entry(node_id).or_insert_with(|| { - let public_key = public_ed_box(&node_id.public()); - SharedSecret::new(secret, &public_key) - }); - cb(x) +impl DiscoState { + fn new( + secret_encryption_key: crypto_box::SecretKey, + ) -> (Self, mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>) { + let (disco_sender, disco_receiver) = mpsc::channel(256); + + ( + Self { + secret_encryption_key, + secrets: Default::default(), + sender: disco_sender, + }, + disco_receiver, + ) + } + + fn try_send(&self, dst: SendAddr, node_id: PublicKey, msg: disco::Message) -> bool { + self.sender.try_send((dst, node_id, msg)).is_ok() } fn encode_and_seal( &self, - this_secret_key: &crypto_box::SecretKey, this_node_id: NodeId, other_node_id: NodeId, msg: &disco::Message, ) -> Bytes { let mut seal = msg.as_bytes(); - self.get(this_secret_key, other_node_id, |secret| { - secret.seal(&mut seal) - }); + self.get_secret(other_node_id, |secret| secret.seal(&mut seal)); disco::encode_message(&this_node_id, seal).into() } + fn unseal_and_decode( &self, - secret: &crypto_box::SecretKey, node_id: PublicKey, - mut sealed_box: Vec, + sealed_box: &[u8], ) -> Result { - self.get(secret, node_id, |secret| secret.open(&mut sealed_box)) + let mut sealed_box = sealed_box.to_vec(); + self.get_secret(node_id, |secret| secret.open(&mut sealed_box)) .context(OpenSnafu)?; disco::Message::from_bytes(&sealed_box).context(ParseSnafu) } + + fn get_secret(&self, node_id: PublicKey, cb: F) -> T + where + F: FnOnce(&mut SharedSecret) -> T, + { + let mut inner = self.secrets.lock().expect("poisoned"); + let x = inner.entry(node_id).or_insert_with(|| { + let public_key = public_ed_box(&node_id.public()); + SharedSecret::new(&self.secret_encryption_key, &public_key) + }); + cb(x) + } } #[allow(missing_docs)] @@ -1665,14 +1645,10 @@ impl AsyncUdpSocket for MagicUdpSocket { #[derive(Debug)] enum ActorMessage { - Shutdown, PingActions(Vec), EndpointPingExpired(usize, stun_rs::TransactionId), - NetReport( - Result>, NetReportError>, - &'static str, - ), NetworkChange, + ScheduleDirectAddrUpdate(UpdateReason, Option<(NodeId, RelayUrl)>), #[cfg(test)] ForceNetworkChange(bool), } @@ -1680,28 +1656,20 @@ enum ActorMessage { struct Actor { msock: Arc, msg_receiver: mpsc::Receiver, - msg_sender: mpsc::Sender, /// When set, is an AfterFunc timer that will call MagicSock::do_periodic_stun. periodic_re_stun_timer: time::Interval, - /// The `NetInfo` provided in the last call to `net_info_func`. It's used to deduplicate calls to netInfoFunc. - net_info_last: Option, - - #[cfg(not(wasm_browser))] - port_mapper: portmapper::Client, - - /// Configuration for net report - net_report_config: net_report::Options, - - /// Whether IPv4 UDP is known to be unable to transmit - /// at all. This could happen if the socket is in an invalid state - /// (as can happen on darwin after a network link status change). - no_v4_send: bool, - - /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings. - net_reporter: net_report::Client, network_monitor: netmon::Monitor, + netmon_watcher: n0_watcher::Direct, network_change_sender: transports::NetworkChangeSender, + /// Indicates the direct addr update state. + direct_addr_update_state: DirectAddrUpdateState, + direct_addr_done_rx: mpsc::Receiver<()>, + + /// List of CallMeMaybe disco messages that should be sent out after + /// the next endpoint update completes + pending_call_me_maybes: HashMap, + disco_receiver: mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>, } #[cfg(not(wasm_browser))] @@ -1754,33 +1722,28 @@ fn bind_ip( Ok((ip, port_mapper)) } -#[derive(Debug, Snafu)] -#[non_exhaustive] -enum NetReportError { - #[snafu(display("Net report not received"))] - NotReceived, - #[snafu(display("Net report timed out"))] - Timeout, - #[snafu(display("Net report encountered an error"))] - NetReport { source: ReportError }, -} - impl Actor { async fn run( mut self, + shutdown_token: CancellationToken, mut watcher: impl Watcher> + Send + Sync, sender: UdpSender, ) { + // Initialize addresses + #[cfg(not(wasm_browser))] + self.update_direct_addresses(None); + // Setup network monitoring - let mut netmon_watcher = self.network_monitor.interface_state(); - let mut current_netmon_state = netmon_watcher.get().expect("missing network state"); + let mut current_netmon_state = self.netmon_watcher.get().expect("missing network state"); #[cfg(not(wasm_browser))] let mut direct_addr_heartbeat_timer = time::interval(HEARTBEAT_INTERVAL); - let mut direct_addr_update_receiver = - self.msock.direct_addr_update_state.running.subscribe(); + #[cfg(not(wasm_browser))] - let mut portmap_watcher = self.port_mapper.watch_external_address(); + let mut portmap_watcher = self + .direct_addr_update_state + .port_mapper + .watch_external_address(); let mut discovery_events: BoxStream = Box::pin(n0_future::stream::empty()); if let Some(d) = self.msock.discovery() { @@ -1793,6 +1756,8 @@ impl Actor { #[cfg_attr(wasm_browser, allow(unused_mut))] let mut portmap_watcher_closed = false; + let mut net_report_watcher = self.msock.net_report.watch(); + loop { self.msock.metrics.magicsock.actor_tick_main.inc(); #[cfg(not(wasm_browser))] @@ -1806,6 +1771,10 @@ impl Actor { let direct_addr_heartbeat_timer_tick = n0_future::future::pending(); tokio::select! { + _ = shutdown_token.cancelled() => { + debug!("shutting down"); + return; + } msg = self.msg_receiver.recv(), if !receiver_closed => { let Some(msg) = msg else { trace!("tick: magicsock receiver closed"); @@ -1817,14 +1786,12 @@ impl Actor { trace!(?msg, "tick: msg"); self.msock.metrics.magicsock.actor_tick_msg.inc(); - if self.handle_actor_message(msg, &sender).await { - return; - } + self.handle_actor_message(msg, &sender).await; } tick = self.periodic_re_stun_timer.tick() => { trace!("tick: re_stun {:?}", tick); self.msock.metrics.magicsock.actor_tick_re_stun.inc(); - self.msock.re_stun("periodic"); + self.re_stun(UpdateReason::Periodic); } new_addr = watcher.updated() => { match new_addr { @@ -1839,6 +1806,32 @@ impl Actor { } } } + report = net_report_watcher.updated() => { + match report { + Ok((report, _)) => { + self.handle_net_report_report(report); + #[cfg(not(wasm_browser))] + { + self.periodic_re_stun_timer = new_re_stun_timer(true); + } + } + Err(_) => { + warn!("net report watcher stopped"); + } + } + } + reason = self.direct_addr_done_rx.recv() => { + match reason { + Some(()) => { + // check if a new run needs to be scheduled + let state = self.netmon_watcher.get().expect("disconnected"); + self.direct_addr_update_state.try_run(state.into()); + } + None => { + warn!("direct addr watcher died"); + } + } + } change = portmap_watcher_changed, if !portmap_watcher_closed => { #[cfg(not(wasm_browser))] { @@ -1854,7 +1847,7 @@ impl Actor { self.msock.metrics.magicsock.actor_tick_portmap_changed.inc(); let new_external_address = *portmap_watcher.borrow(); debug!("external address updated: {new_external_address:?}"); - self.msock.re_stun("portmap_updated"); + self.re_stun(UpdateReason::PortmapUpdated); } #[cfg(wasm_browser)] let _unused_in_browsers = change; @@ -1874,15 +1867,7 @@ impl Actor { self.handle_ping_actions(&sender, msgs).await; } } - _ = direct_addr_update_receiver.changed() => { - let reason = *direct_addr_update_receiver.borrow(); - trace!("tick: direct addr update receiver {:?}", reason); - self.msock.metrics.magicsock.actor_tick_direct_addr_update_receiver.inc(); - if let Some(reason) = reason { - self.refresh_direct_addrs(reason).await; - } - } - state = netmon_watcher.updated() => { + state = self.netmon_watcher.updated() => { let Ok(state) = state else { trace!("tick: link change receiver closed"); self.msock.metrics.magicsock.actor_tick_other.inc(); @@ -1892,7 +1877,7 @@ impl Actor { current_netmon_state = state; trace!("tick: link change {}", is_major); self.msock.metrics.magicsock.actor_link_change.inc(); - self.handle_network_change(is_major); + self.handle_network_change(is_major).await; } // Even if `discovery_events` yields `None`, it could begin to yield // `Some` again in the future, so we don't want to disable this branch @@ -1912,11 +1897,16 @@ impl Actor { // Send the discovery item to the subscribers of the discovery broadcast stream. self.msock.discovery_subscribers.send(discovery_item); } + Some((dst, dst_key, msg)) = self.disco_receiver.recv() => { + if let Err(err) = self.msock.send_disco_message(&sender, dst.clone(), dst_key, msg).await { + warn!(%dst, node = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)"); + } + } } } } - fn handle_network_change(&mut self, is_major: bool) { + async fn handle_network_change(&mut self, is_major: bool) { debug!("link change detected: major? {}", is_major); if is_major { @@ -1925,14 +1915,20 @@ impl Actor { } #[cfg(not(wasm_browser))] - self.msock.dns_resolver.clear_cache(); - self.msock.re_stun("link-change-major"); + self.msock.dns_resolver.reset().await; + self.re_stun(UpdateReason::LinkChangeMajor); self.reset_endpoint_states(); } else { - self.msock.re_stun("link-change-minor"); + self.re_stun(UpdateReason::LinkChangeMinor); } } + fn re_stun(&mut self, why: UpdateReason) { + let state = self.netmon_watcher.get().expect("disconnected"); + self.direct_addr_update_state + .schedule_run(why, state.into()); + } + #[instrument(skip_all)] async fn handle_ping_actions(&mut self, sender: &UdpSender, msgs: Vec) { if let Err(err) = self.msock.send_ping_actions(sender, msgs).await { @@ -1943,65 +1939,30 @@ impl Actor { /// Processes an incoming actor message. /// /// Returns `true` if it was a shutdown. - async fn handle_actor_message(&mut self, msg: ActorMessage, sender: &UdpSender) -> bool { + async fn handle_actor_message(&mut self, msg: ActorMessage, sender: &UdpSender) { match msg { - ActorMessage::Shutdown => { - debug!("shutting down"); - - self.msock.node_map.notify_shutdown(); - #[cfg(not(wasm_browser))] - self.port_mapper.deactivate(); - - debug!("shutdown complete"); - return true; - } ActorMessage::EndpointPingExpired(id, txid) => { self.msock.node_map.notify_ping_timeout(id, txid); } - ActorMessage::NetReport(report, why) => { - match report { - Ok(report) => { - self.handle_net_report_report(report).await; - } - Err(err) => { - warn!( - "failed to generate net_report report for: {}: {:?}", - why, err - ); - } - } - self.finalize_direct_addrs_update(why); - } ActorMessage::NetworkChange => { self.network_monitor.network_change().await.ok(); } + ActorMessage::ScheduleDirectAddrUpdate(why, data) => { + if let Some((node, url)) = data { + self.pending_call_me_maybes.insert(node, url); + } + let state = self.netmon_watcher.get().expect("disconnected"); + self.direct_addr_update_state + .schedule_run(why, state.into()); + } #[cfg(test)] ActorMessage::ForceNetworkChange(is_major) => { - self.handle_network_change(is_major); + self.handle_network_change(is_major).await; } ActorMessage::PingActions(ping_actions) => { self.handle_ping_actions(sender, ping_actions).await; } } - - false - } - - /// Refreshes knowledge about our direct addresses. - /// - /// In other words, this triggers a net_report run. - /// - /// Note that invoking this is managed by the [`DirectAddrUpdateState`] and this should - /// never be invoked directly. Some day this will be refactored to not allow this easy - /// mistake to be made. - #[instrument(level = "debug", skip_all)] - async fn refresh_direct_addrs(&mut self, why: &'static str) { - self.msock.metrics.magicsock.update_direct_addrs.inc(); - - debug!("starting direct addr update ({})", why); - #[cfg(not(wasm_browser))] - self.port_mapper.procure_mapping(); - self.update_net_info(why).await; } /// Updates the direct addresses of this magic socket. @@ -2013,8 +1974,11 @@ impl Actor { /// - A net_report report. /// - The local interfaces IP addresses. #[cfg(not(wasm_browser))] - fn update_direct_addresses(&mut self, net_report_report: Option>) { - let portmap_watcher = self.port_mapper.watch_external_address(); + fn update_direct_addresses(&mut self, net_report_report: Option<&net_report::Report>) { + let portmap_watcher = self + .direct_addr_update_state + .port_mapper + .watch_external_address(); // We only want to have one DirectAddr for each SocketAddr we have. So we store // this as a map of SocketAddr -> DirectAddrType. At the end we will construct a @@ -2027,15 +1991,12 @@ impl Actor { addrs .entry(portmap_ext) .or_insert(DirectAddrType::Portmapped); - self.set_net_info_have_port_map(); } // Next add STUN addresses from the net_report report. if let Some(net_report_report) = net_report_report { if let Some(global_v4) = net_report_report.global_v4 { - addrs - .entry(global_v4.into()) - .or_insert(DirectAddrType::Stun); + addrs.entry(global_v4.into()).or_insert(DirectAddrType::Qad); // If they're behind a hard NAT and are using a fixed // port locally, assume they might've added a static @@ -2051,21 +2012,19 @@ impl Actor { if let Some(port) = port { if net_report_report - .mapping_varies_by_dest_ip + .mapping_varies_by_dest() .unwrap_or_default() { let mut addr = global_v4; addr.set_port(port); addrs .entry(addr.into()) - .or_insert(DirectAddrType::Stun4LocalPort); + .or_insert(DirectAddrType::Qad4LocalPort); } } } if let Some(global_v6) = net_report_report.global_v6 { - addrs - .entry(global_v6.into()) - .or_insert(DirectAddrType::Stun); + addrs.entry(global_v6.into()).or_insert(DirectAddrType::Qad); } } @@ -2077,7 +2036,6 @@ impl Actor { .zip(self.msock.ip_local_addrs()) .collect(); - let msock = self.msock.clone(); let has_ipv4_unspecified = local_addrs.iter().find_map(|(_, a)| { if a.is_ipv4() && a.ip().is_unspecified() { Some(a.port()) @@ -2093,248 +2051,95 @@ impl Actor { } }); - // The following code can be slow, we do not want to block the caller since it would - // block the actor loop. - task::spawn( - async move { - // If a socket is bound to the unspecified address, create SocketAddrs for - // each local IP address by pairing it with the port the socket is bound on. - if local_addrs - .iter() - .any(|(_, local)| local.ip().is_unspecified()) - { - // Depending on the OS and network interfaces attached and their state - // enumerating the local interfaces can take a long time. Especially - // Windows is very slow. - let LocalAddresses { - regular: mut ips, - loopback, - } = tokio::task::spawn_blocking(LocalAddresses::new) - .await - .expect("spawn panicked"); - if ips.is_empty() && addrs.is_empty() { - // Include loopback addresses only if there are no other interfaces - // or public addresses, this allows testing offline. - ips = loopback; - } - - for ip in ips { - let port_if_unspecified = match ip { - IpAddr::V4(_) => has_ipv4_unspecified, - IpAddr::V6(_) => has_ipv6_unspecified, - }; - if let Some(port) = port_if_unspecified { - let addr = SocketAddr::new(ip, port); - addrs.entry(addr).or_insert(DirectAddrType::Local); - } - } - } - - // If a socket is bound to a specific address, add it. - for (bound, local) in local_addrs { - if !bound.ip().is_unspecified() { - addrs.entry(local).or_insert(DirectAddrType::Local); - } + // If a socket is bound to the unspecified address, create SocketAddrs for + // each local IP address by pairing it with the port the socket is bound on. + if local_addrs + .iter() + .any(|(_, local)| local.ip().is_unspecified()) + { + let LocalAddresses { + regular: mut ips, + loopback, + } = self + .netmon_watcher + .get() + .expect("netmon disconnected") + .local_addresses; + if ips.is_empty() && addrs.is_empty() { + // Include loopback addresses only if there are no other interfaces + // or public addresses, this allows testing offline. + ips = loopback; + } + + for ip in ips { + let port_if_unspecified = match ip { + IpAddr::V4(_) => has_ipv4_unspecified, + IpAddr::V6(_) => has_ipv6_unspecified, + }; + if let Some(port) = port_if_unspecified { + let addr = SocketAddr::new(ip, port); + addrs.entry(addr).or_insert(DirectAddrType::Local); } - - // Finally create and store store all these direct addresses and send any - // queued call-me-maybe messages. - msock.store_direct_addresses( - addrs - .iter() - .map(|(addr, typ)| DirectAddr { - addr: *addr, - typ: *typ, - }) - .collect(), - ); - msock.send_queued_call_me_maybes(); - } - .instrument(Span::current()), - ); - } - - /// Called when a direct addr update is done, no matter if it was successful or not. - fn finalize_direct_addrs_update(&mut self, why: &'static str) { - let new_why = self.msock.direct_addr_update_state.next_update(); - if !self.msock.is_closed() { - if let Some(new_why) = new_why { - self.msock.direct_addr_update_state.run(new_why); - return; - } - #[cfg(not(wasm_browser))] - { - self.periodic_re_stun_timer = new_re_stun_timer(true); - } - } - - self.msock.direct_addr_update_state.finish_run(); - debug!("direct addr update done ({})", why); - } - - /// Updates `NetInfo.HavePortMap` to true. - #[instrument(level = "debug", skip_all)] - fn set_net_info_have_port_map(&mut self) { - if let Some(ref mut net_info_last) = self.net_info_last { - if net_info_last.have_port_map { - // No change. - return; } - net_info_last.have_port_map = true; - self.net_info_last = Some(net_info_last.clone()); } - } - #[instrument(level = "debug", skip_all)] - async fn call_net_info_callback(&mut self, ni: NetInfo) { - if let Some(ref net_info_last) = self.net_info_last { - if ni.basically_equal(net_info_last) { - return; + // If a socket is bound to a specific address, add it. + for (bound, local) in local_addrs { + if !bound.ip().is_unspecified() { + addrs.entry(local).or_insert(DirectAddrType::Local); } } - self.net_info_last = Some(ni); + // Finally create and store store all these direct addresses and send any + // queued call-me-maybe messages. + self.msock.store_direct_addresses( + addrs + .iter() + .map(|(addr, typ)| DirectAddr { + addr: *addr, + typ: *typ, + }) + .collect(), + ); + self.send_queued_call_me_maybes(); } - /// Calls net_report. - /// - /// Note that invoking this is managed by [`DirectAddrUpdateState`] via - /// [`Actor::refresh_direct_addrs`] and this should never be invoked directly. Some day - /// this will be refactored to not allow this easy mistake to be made. - #[instrument(level = "debug", skip_all)] - async fn update_net_info(&mut self, why: &'static str) { - // Don't start a net report probe if we know - // we are shutting down - if self.msock.is_closing() || self.msock.is_closed() { - debug!("skipping net_report, socket is shutting down"); - return; - } - if self.msock.relay_map.is_empty() { - debug!("skipping net_report, empty RelayMap"); - self.msg_sender - .send(ActorMessage::NetReport(Ok(None), why)) - .await - .ok(); - return; - } - - let relay_map = self.msock.relay_map.clone(); - let opts = self.net_report_config.clone(); + fn send_queued_call_me_maybes(&mut self) { + let msg = self.msock.direct_addrs.to_call_me_maybe_message(); + let msg = disco::Message::CallMeMaybe(msg); + // allocate, to minimize locking duration - debug!("requesting net_report report"); - match self.net_reporter.get_report_channel(relay_map, opts).await { - Ok(rx) => { - let msg_sender = self.msg_sender.clone(); - task::spawn(async move { - let report = time::timeout(NET_REPORT_TIMEOUT, rx).await; - let report = match report { - Ok(Ok(Ok(report))) => Ok(Some(report)), - Ok(Ok(Err(err))) => Err(NetReportSnafu.into_error(err)), - Ok(Err(_)) => Err(NotReceivedSnafu.build()), - Err(_) => Err(TimeoutSnafu.build()), - }; - msg_sender - .send(ActorMessage::NetReport(report, why)) - .await - .ok(); - // The receiver of the NetReport message will call - // .finalize_direct_addrs_update(). - }); - } - Err(err) => { - warn!("unable to start net_report generation: {:?}", err); - self.finalize_direct_addrs_update(why); + for (public_key, url) in self.pending_call_me_maybes.drain() { + if !self + .msock + .disco + .try_send(SendAddr::Relay(url), public_key, msg.clone()) + { + warn!(node = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe"); } } } - async fn handle_net_report_report(&mut self, report: Option>) { - if let Some(ref report) = report { - // only returns Err if the report hasn't changed. - self.msock.net_report.set(Some(report.clone())).ok(); - self.msock - .ipv6_reported - .store(report.ipv6, Ordering::Relaxed); - let r = &report; - trace!( - "setting no_v4_send {} -> {}", - self.no_v4_send, - !r.ipv4_can_send - ); - self.no_v4_send = !r.ipv4_can_send; - - #[cfg(not(wasm_browser))] - let have_port_map = self.port_mapper.watch_external_address().borrow().is_some(); - #[cfg(wasm_browser)] - let have_port_map = false; - - let mut ni = NetInfo { - relay_latency: Default::default(), - mapping_varies_by_dest_ip: r.mapping_varies_by_dest_ip, - hair_pinning: r.hair_pinning, - #[cfg(not(wasm_browser))] - portmap_probe: r.portmap_probe.clone(), - have_port_map, - working_ipv6: Some(r.ipv6), - os_has_ipv6: Some(r.os_has_ipv6), - working_udp: Some(r.udp), - working_icmp_v4: r.icmpv4, - working_icmp_v6: r.icmpv6, - preferred_relay: r.preferred_relay.clone(), - }; - for (rid, d) in r.relay_v4_latency.iter() { - ni.relay_latency - .insert(format!("{rid}-v4"), d.as_secs_f64()); - } - for (rid, d) in r.relay_v6_latency.iter() { - ni.relay_latency - .insert(format!("{rid}-v6"), d.as_secs_f64()); - } - - if ni.preferred_relay.is_none() { - // Perhaps UDP is blocked. Pick a deterministic but arbitrary one. - ni.preferred_relay = self.pick_relay_fallback(); + fn handle_net_report_report(&mut self, mut report: Option) { + if let Some(ref mut r) = report { + self.msock.ipv6_reported.store(r.udp_v6, Ordering::Relaxed); + if r.preferred_relay.is_none() { + if let Some(my_relay) = self.msock.my_relay() { + r.preferred_relay.replace(my_relay); + } } // Notify all transports - self.network_change_sender.on_network_change(&ni); - - // TODO: set link type - self.call_net_info_callback(ni).await; - } - #[cfg(not(wasm_browser))] - self.update_direct_addresses(report); - } - - /// Returns a deterministic relay node to connect to. This is only used if net_report - /// couldn't find the nearest one, for instance, if UDP is blocked and thus STUN - /// latency checks aren't working. - /// - /// If no the [`RelayMap`] is empty, returns `0`. - fn pick_relay_fallback(&self) -> Option { - // TODO: figure out which relay node most of our nodes are using, - // and use that region as our fallback. - // - // If we already had selected something in the past and it has any - // nodes, we want to stay on it. If there are no nodes at all, - // stay on whatever relay we previously picked. If we need to pick - // one and have no node info, pick a node randomly. - // - // We used to do the above for legacy clients, but never updated it for disco. - - let my_relay = self.msock.my_relay(); - if my_relay.is_some() { - return my_relay; + self.network_change_sender.on_network_change(r); } - let ids = self.msock.relay_map.urls().collect::>(); - let mut rng = rand::rngs::StdRng::seed_from_u64(0); - ids.choose(&mut rng).map(|c| (*c).clone()) + #[cfg(not(wasm_browser))] + self.update_direct_addresses(report.as_ref()); } /// Resets the preferred address for all nodes. /// This is called when connectivity changes enough that we no longer trust the old routes. - #[instrument(skip_all, fields(me = %self.msock.me))] + #[instrument(skip_all)] fn reset_endpoint_states(&mut self) { self.msock.node_map.reset_node_states() } @@ -2386,7 +2191,7 @@ fn bind_with_fallback(mut addr: SocketAddr) -> io::Result { /// /// These are all the [`DirectAddr`]s that this [`MagicSock`] is aware of for itself. /// They include all locally bound ones as well as those discovered by other mechanisms like -/// STUN. +/// QAD. #[derive(derive_more::Debug, Default, Clone)] struct DiscoveredDirectAddrs { /// The last set of discovered direct addresses. @@ -2576,23 +2381,23 @@ pub enum DirectAddrType { Unknown, /// A locally bound socket address. Local, - /// Public internet address discovered via STUN. + /// Public internet address discovered via QAD. /// - /// When possible an iroh node will perform STUN to discover which is the address + /// When possible an iroh node will perform QAD to discover which is the address /// from which it sends data on the public internet. This can be different from locally /// bound addresses when the node is on a local network which performs NAT or similar. - Stun, + Qad, /// An address assigned by the router using port mapping. /// /// When possible an iroh node will request a port mapping from the local router to /// get a publicly routable direct address. Portmapped, - /// Hard NAT: STUN'ed IPv4 address + local fixed port. + /// Hard NAT: QAD'ed IPv4 address + local fixed port. /// /// It is possible to configure iroh to bound to a specific port and independently /// configure the router to forward this port to the iroh node. This indicates a - /// situation like this, which still uses STUN to discover the public address. - Stun4LocalPort, + /// situation like this, which still uses QAD to discover the public address. + Qad4LocalPort, } impl Display for DirectAddrType { @@ -2600,98 +2405,19 @@ impl Display for DirectAddrType { match self { DirectAddrType::Unknown => write!(f, "?"), DirectAddrType::Local => write!(f, "local"), - DirectAddrType::Stun => write!(f, "stun"), + DirectAddrType::Qad => write!(f, "qad"), DirectAddrType::Portmapped => write!(f, "portmap"), - DirectAddrType::Stun4LocalPort => write!(f, "stun4localport"), + DirectAddrType::Qad4LocalPort => write!(f, "qad4localport"), } } } -/// Contains information about the host's network state. -#[derive(Debug, Clone, PartialEq)] -pub(crate) struct NetInfo { - /// Says whether the host's NAT mappings vary based on the destination IP. - mapping_varies_by_dest_ip: Option, - - /// If their router does hairpinning. It reports true even if there's no NAT involved. - hair_pinning: Option, - - /// Whether the host has IPv6 internet connectivity. - working_ipv6: Option, - - /// Whether the OS supports IPv6 at all, regardless of whether IPv6 internet connectivity is available. - os_has_ipv6: Option, - - /// Whether the host has UDP internet connectivity. - working_udp: Option, - - /// Whether ICMPv4 works, `None` means not checked. - working_icmp_v4: Option, - - /// Whether ICMPv6 works, `None` means not checked. - working_icmp_v6: Option, - - /// Whether we have an existing portmap open (UPnP, PMP, or PCP). - have_port_map: bool, - - /// Probe indicating the presence of port mapping protocols on the LAN. - #[cfg(not(wasm_browser))] - portmap_probe: Option, - - /// This node's preferred relay server for incoming traffic. - /// - /// The node might be be temporarily connected to multiple relay servers (to send to - /// other nodes) but this is the relay on which you can always contact this node. Also - /// known as home relay. - preferred_relay: Option, - - /// The fastest recent time to reach various relay STUN servers, in seconds. - /// - /// This should only be updated rarely, or when there's a - /// material change, as any change here also gets uploaded to the control plane. - relay_latency: BTreeMap, -} - -impl NetInfo { - /// Checks if this is probably still the same network as *other*. - /// - /// This tries to compare the network situation, without taking into account things - /// expected to change a little like e.g. latency to the relay server. - fn basically_equal(&self, other: &Self) -> bool { - let eq_icmp_v4 = match (self.working_icmp_v4, other.working_icmp_v4) { - (Some(slf), Some(other)) => slf == other, - _ => true, // ignore for comparison if only one report had this info - }; - let eq_icmp_v6 = match (self.working_icmp_v6, other.working_icmp_v6) { - (Some(slf), Some(other)) => slf == other, - _ => true, // ignore for comparison if only one report had this info - }; - - #[cfg(not(wasm_browser))] - let probe_eq = self.portmap_probe == other.portmap_probe; - #[cfg(wasm_browser)] - let probe_eq = true; - - self.mapping_varies_by_dest_ip == other.mapping_varies_by_dest_ip - && self.hair_pinning == other.hair_pinning - && self.working_ipv6 == other.working_ipv6 - && self.os_has_ipv6 == other.os_has_ipv6 - && self.working_udp == other.working_udp - && eq_icmp_v4 - && eq_icmp_v6 - && self.have_port_map == other.have_port_map - && probe_eq - && self.preferred_relay == other.preferred_relay - } -} - #[cfg(test)] mod tests { use std::{collections::BTreeSet, sync::Arc, time::Duration}; use data_encoding::HEXLOWER; - use iroh_base::{NodeAddr, NodeId, PublicKey, SecretKey}; - use iroh_relay::RelayMap; + use iroh_base::{NodeAddr, NodeId, PublicKey}; use n0_future::{time, StreamExt}; use n0_snafu::{Result, ResultExt}; use n0_watcher::Watcher; @@ -2707,7 +2433,7 @@ mod tests { dns::DnsResolver, endpoint::{DirectAddr, PathSelection, Source}, magicsock::{node_map, Handle, MagicSock}, - tls, Endpoint, RelayMode, + tls, Endpoint, RelayMap, RelayMode, SecretKey, }; const ALPN: &[u8] = b"n0/test/1"; diff --git a/iroh/src/magicsock/metrics.rs b/iroh/src/magicsock/metrics.rs index b6d7fe5d44c..803a829bd48 100644 --- a/iroh/src/magicsock/metrics.rs +++ b/iroh/src/magicsock/metrics.rs @@ -8,7 +8,6 @@ use serde::{Deserialize, Serialize}; #[non_exhaustive] #[metrics(name = "magicsock")] pub struct Metrics { - pub re_stun_calls: Counter, pub update_direct_addrs: Counter, // Sends (data or disco) @@ -66,7 +65,6 @@ pub struct Metrics { pub actor_tick_re_stun: Counter, pub actor_tick_portmap_changed: Counter, pub actor_tick_direct_addr_heartbeat: Counter, - pub actor_tick_direct_addr_update_receiver: Counter, pub actor_link_change: Counter, pub actor_tick_other: Counter, diff --git a/iroh/src/magicsock/node_map.rs b/iroh/src/magicsock/node_map.rs index c15aa69c008..252f5d0eb97 100644 --- a/iroh/src/magicsock/node_map.rs +++ b/iroh/src/magicsock/node_map.rs @@ -265,13 +265,6 @@ impl NodeMap { Some((public_key, udp_addr, relay_url, ping_actions)) } - pub(super) fn notify_shutdown(&self) { - let mut inner = self.inner.lock().expect("poisoned"); - for (_, ep) in inner.node_states_mut() { - ep.reset(); - } - } - pub(super) fn reset_node_states(&self) { let mut inner = self.inner.lock().expect("poisoned"); for (_, ep) in inner.node_states_mut() { diff --git a/iroh/src/magicsock/node_map/best_addr.rs b/iroh/src/magicsock/node_map/best_addr.rs index 7670bb3e9ae..48866e27813 100644 --- a/iroh/src/magicsock/node_map/best_addr.rs +++ b/iroh/src/magicsock/node_map/best_addr.rs @@ -57,7 +57,6 @@ pub(super) enum State<'a> { #[derive(Debug, Clone, Copy)] pub enum ClearReason { - Reset, Inactive, PongTimeout, MatchesOurLocalAddr, diff --git a/iroh/src/magicsock/node_map/node_state.rs b/iroh/src/magicsock/node_map/node_state.rs index f48e5488c3a..be1e0a58dbe 100644 --- a/iroh/src/magicsock/node_map/node_state.rs +++ b/iroh/src/magicsock/node_map/node_state.rs @@ -6,7 +6,6 @@ use std::{ use data_encoding::HEXLOWER; use iroh_base::{NodeAddr, NodeId, PublicKey, RelayUrl}; -use iroh_relay::protos::stun; use n0_future::{ task::{self, AbortOnDropHandle}, time::{self, Duration, Instant}, @@ -44,7 +43,7 @@ const PING_TIMEOUT_DURATION: Duration = Duration::from_secs(5); const GOOD_ENOUGH_LATENCY: Duration = Duration::from_millis(5); /// How long since the last activity we try to keep an established endpoint peering alive. -/// It's also the idle time at which we stop doing STUN queries to keep NAT mappings alive. +/// It's also the idle time at which we stop doing QAD queries to keep NAT mappings alive. pub(super) const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45); /// How often we try to upgrade to a better patheven if we have some non-relay route that works. @@ -67,7 +66,7 @@ pub(in crate::magicsock) struct SendPing { pub id: usize, pub dst: SendAddr, pub dst_node: NodeId, - pub tx_id: stun::TransactionId, + pub tx_id: stun_rs::TransactionId, pub purpose: DiscoPingPurpose, } @@ -114,7 +113,7 @@ pub(super) struct NodeState { /// The fallback/bootstrap path, if non-zero (non-zero for well-behaved clients). relay_url: Option<(RelayUrl, PathState)>, udp_paths: NodeUdpPaths, - sent_pings: HashMap, + sent_pings: HashMap, /// Last time this node was used. /// /// A node is marked as in use when sending datagrams to them, or when having received @@ -285,7 +284,9 @@ impl NodeState { ) -> (Option, Option) { #[cfg(any(test, feature = "test-utils"))] if self.path_selection == PathSelection::RelayOnly { - debug!("in `RelayOnly` mode, giving the relay address as the only viable address for this endpoint"); + debug!( + "in `RelayOnly` mode, giving the relay address as the only viable address for this endpoint" + ); return (None, self.relay_url()); } let (best_addr, relay_url) = match self.udp_paths.send_addr(*now, have_ipv6) { @@ -429,7 +430,7 @@ impl NodeState { /// Cleanup the expired ping for the passed in txid. #[instrument("disco", skip_all, fields(node = %self.node_id.fmt_short()))] - pub(super) fn ping_timeout(&mut self, txid: stun::TransactionId) { + pub(super) fn ping_timeout(&mut self, txid: stun_rs::TransactionId) { if let Some(sp) = self.sent_pings.remove(&txid) { debug!(tx = %HEXLOWER.encode(&txid), addr = %sp.to, "pong not received in timeout"); match sp.to { @@ -487,7 +488,7 @@ impl NodeState { return None; // Similar to `RelayOnly` mode, we don't send UDP pings for hole-punching. } - let tx_id = stun::TransactionId::default(); + let tx_id = stun_rs::TransactionId::default(); trace!(tx = %HEXLOWER.encode(&tx_id), %dst, ?purpose, dst = %self.node_id.fmt_short(), "start ping"); event!( @@ -511,7 +512,7 @@ impl NodeState { pub(super) fn ping_sent( &mut self, to: SendAddr, - tx_id: stun::TransactionId, + tx_id: stun_rs::TransactionId, purpose: DiscoPingPurpose, sender: mpsc::Sender, ) { @@ -708,19 +709,6 @@ impl NodeState { debug!(new = ?new_addrs , %paths, "added new direct paths for endpoint"); } - /// Clears all the endpoint's p2p state, reverting it to a relay-only endpoint. - #[instrument(skip_all, fields(node = %self.node_id.fmt_short()))] - pub(super) fn reset(&mut self) { - self.last_full_ping = None; - self.udp_paths - .best_addr - .clear(ClearReason::Reset, self.relay_url.is_some()); - - for es in self.udp_paths.paths.values_mut() { - es.last_ping = None; - } - } - /// Handle a received Disco Ping. /// /// - Ensures the paths the ping was received on is a known path for this endpoint. @@ -733,7 +721,7 @@ impl NodeState { pub(super) fn handle_ping( &mut self, path: SendAddr, - tx_id: stun::TransactionId, + tx_id: stun_rs::TransactionId, ) -> PingHandled { let now = Instant::now(); diff --git a/iroh/src/magicsock/node_map/path_state.rs b/iroh/src/magicsock/node_map/path_state.rs index 7241121722a..2d6855cab30 100644 --- a/iroh/src/magicsock/node_map/path_state.rs +++ b/iroh/src/magicsock/node_map/path_state.rs @@ -6,7 +6,6 @@ use std::{ }; use iroh_base::NodeId; -use iroh_relay::protos::stun; use n0_future::time::{Duration, Instant}; use tracing::{debug, event, Level}; @@ -39,7 +38,7 @@ pub(super) struct PathState { /// If non-zero, means that this was an endpoint that we learned about at runtime (from an /// incoming ping). If so, we keep the time updated and use it to discard old candidates. // NOTE: tx_id Originally added in tailscale due to . - last_got_ping: Option<(Instant, stun::TransactionId)>, + last_got_ping: Option<(Instant, stun_rs::TransactionId)>, /// The time this endpoint was last advertised via a call-me-maybe DISCO message. pub(super) call_me_maybe_time: Option, @@ -107,7 +106,7 @@ impl PathState { pub(super) fn with_ping( node_id: NodeId, path: SendAddr, - tx_id: stun::TransactionId, + tx_id: stun_rs::TransactionId, source: Source, now: Instant, ) -> Self { @@ -239,7 +238,7 @@ impl PathState { } } - pub(super) fn handle_ping(&mut self, tx_id: stun::TransactionId, now: Instant) -> PingRole { + pub(super) fn handle_ping(&mut self, tx_id: stun_rs::TransactionId, now: Instant) -> PingRole { if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) { PingRole::Duplicate } else { diff --git a/iroh/src/magicsock/transports.rs b/iroh/src/magicsock/transports.rs index b37f9ee3b94..20936fbde39 100644 --- a/iroh/src/magicsock/transports.rs +++ b/iroh/src/magicsock/transports.rs @@ -21,7 +21,8 @@ pub(crate) use self::ip::IpTransport; #[cfg(not(wasm_browser))] use self::ip::{IpNetworkChangeSender, IpSender}; pub(crate) use self::relay::{RelayActorConfig, RelayTransport}; -use super::{MagicSock, NetInfo}; +use super::MagicSock; +use crate::net_report::Report; /// Manages the different underlying data transports that the magicsock /// can support. @@ -262,14 +263,14 @@ pub(crate) struct NetworkChangeSender { } impl NetworkChangeSender { - pub(crate) fn on_network_change(&self, info: &NetInfo) { + pub(crate) fn on_network_change(&self, report: &Report) { #[cfg(not(wasm_browser))] for ip in &self.ip { - ip.on_network_change(info); + ip.on_network_change(report); } for relay in &self.relay { - relay.on_network_change(info); + relay.on_network_change(report); } } @@ -337,10 +338,6 @@ impl Addr { matches!(self, Self::Relay(..)) } - pub(crate) fn is_ip(&self) -> bool { - matches!(self, Self::Ip(..)) - } - /// Returns `None` if not an `Ip`. pub(crate) fn into_socket_addr(self) -> Option { match self { diff --git a/iroh/src/magicsock/transports/ip.rs b/iroh/src/magicsock/transports/ip.rs index cc4b945d541..68033695305 100644 --- a/iroh/src/magicsock/transports/ip.rs +++ b/iroh/src/magicsock/transports/ip.rs @@ -87,10 +87,6 @@ impl IpTransport { } } - pub(crate) fn socket(&self) -> Arc { - self.socket.clone() - } - pub(super) fn create_sender(&self) -> IpSender { let sender = self.socket.clone().create_sender(); IpSender { @@ -109,14 +105,16 @@ pub(super) struct IpNetworkChangeSender { impl IpNetworkChangeSender { pub(super) fn rebind(&self) -> io::Result<()> { + let old_addr = self.local_addr.get(); self.socket.rebind()?; let addr = self.socket.local_addr()?; self.local_addr.set(addr).ok(); + trace!("rebound from {} to {}", old_addr, addr); Ok(()) } - pub(super) fn on_network_change(&self, _info: &crate::magicsock::NetInfo) { + pub(super) fn on_network_change(&self, _info: &crate::magicsock::Report) { // Nothing to do for now } } diff --git a/iroh/src/magicsock/transports/relay.rs b/iroh/src/magicsock/transports/relay.rs index 0a9d9ef89c0..9345bed6af2 100644 --- a/iroh/src/magicsock/transports/relay.rs +++ b/iroh/src/magicsock/transports/relay.rs @@ -142,8 +142,10 @@ pub(super) struct RelayNetworkChangeSender { } impl RelayNetworkChangeSender { - pub(super) fn on_network_change(&self, info: &crate::magicsock::NetInfo) { - self.send_relay_actor(RelayActorMessage::NetworkChange { info: info.clone() }); + pub(super) fn on_network_change(&self, report: &crate::magicsock::Report) { + self.send_relay_actor(RelayActorMessage::NetworkChange { + report: report.clone(), + }); } pub(super) fn rebind(&self) -> io::Result<()> { diff --git a/iroh/src/magicsock/transports/relay/actor.rs b/iroh/src/magicsock/transports/relay/actor.rs index 37e84d00a99..cf5c684cfde 100644 --- a/iroh/src/magicsock/transports/relay/actor.rs +++ b/iroh/src/magicsock/transports/relay/actor.rs @@ -62,7 +62,8 @@ use url::Url; #[cfg(not(wasm_browser))] use crate::dns::DnsResolver; use crate::{ - magicsock::{Metrics as MagicsockMetrics, NetInfo, RelayContents}, + magicsock::{Metrics as MagicsockMetrics, RelayContents}, + net_report::Report, util::MaybeFuture, }; @@ -844,7 +845,7 @@ impl ConnectedRelayState { pub(super) enum RelayActorMessage { MaybeCloseRelaysOnRebind, - NetworkChange { info: NetInfo }, + NetworkChange { report: Report }, } #[derive(Debug, Clone)] @@ -970,8 +971,8 @@ impl RelayActor { async fn handle_msg(&mut self, msg: RelayActorMessage) { match msg { - RelayActorMessage::NetworkChange { info } => { - self.on_network_change(info).await; + RelayActorMessage::NetworkChange { report } => { + self.on_network_change(report).await; } RelayActorMessage::MaybeCloseRelaysOnRebind => { self.maybe_close_relays_on_rebind().await; @@ -1007,19 +1008,19 @@ impl RelayActor { } } - async fn on_network_change(&mut self, info: NetInfo) { + async fn on_network_change(&mut self, report: Report) { let my_relay = self.config.my_relay.get(); - if info.preferred_relay == my_relay { + if report.preferred_relay == my_relay { // No change. return; } let old_relay = self .config .my_relay - .set(info.preferred_relay.clone()) + .set(report.preferred_relay.clone()) .unwrap_or_else(|e| e); - if let Some(relay_url) = info.preferred_relay { + if let Some(relay_url) = report.preferred_relay { self.config.metrics.relay_home_change.inc(); // On change, notify all currently connected relay servers and diff --git a/iroh/src/net_report.rs b/iroh/src/net_report.rs index 4991db9b0ff..577e06ba504 100644 --- a/iroh/src/net_report.rs +++ b/iroh/src/net_report.rs @@ -11,43 +11,49 @@ #![cfg_attr(wasm_browser, allow(unused))] use std::{ - collections::{BTreeMap, HashMap}, - fmt::{self, Debug}, - net::{SocketAddr, SocketAddrV4, SocketAddrV6}, + collections::{BTreeMap, BTreeSet}, + fmt::Debug, + net::SocketAddr, sync::Arc, }; -use bytes::Bytes; +use defaults::timeouts::PROBES_TIMEOUT; use iroh_base::RelayUrl; #[cfg(not(wasm_browser))] use iroh_relay::dns::DnsResolver; -use iroh_relay::{protos::stun, RelayMap}; +#[cfg(not(wasm_browser))] +use iroh_relay::quic::QuicClient; +#[cfg(not(wasm_browser))] +use iroh_relay::RelayNode; +use iroh_relay::{ + quic::{QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON}, + RelayMap, +}; +#[cfg(not(wasm_browser))] +use n0_future::task; use n0_future::{ - task::{self, AbortOnDropHandle}, - time::{Duration, Instant}, + task::AbortOnDropHandle, + time::{self, Duration, Instant}, + StreamExt, }; -use nested_enum_utils::common_fields; +use n0_watcher::{Watchable, Watcher}; +use tokio::task::JoinSet; +use tokio_util::sync::CancellationToken; +use tracing::{debug, trace, warn}; + #[cfg(not(wasm_browser))] -use netwatch::UdpSocket; -use reportgen::ActorRunError; -use snafu::Snafu; -use tokio::sync::{self, mpsc, oneshot}; -use tracing::{debug, error, info_span, trace, warn, Instrument}; +use self::reportgen::QadProbeReport; +use self::reportgen::{ProbeFinished, ProbeReport}; mod defaults; -#[cfg(not(wasm_browser))] -mod dns; mod ip_mapped_addrs; mod metrics; -#[cfg(not(wasm_browser))] -mod ping; +mod probes; +mod report; mod reportgen; mod options; -#[cfg(not(wasm_browser))] -pub use stun_utils::bind_local_stun_socket; - /// We "vendor" what we need of the library in browsers for simplicity. /// /// We could consider making `portmapper` compile to wasm in the future, @@ -68,153 +74,118 @@ pub(crate) mod portmapper { } pub(crate) use ip_mapped_addrs::{IpMappedAddr, IpMappedAddresses}; -pub use metrics::Metrics; -pub use options::Options; -pub use reportgen::QuicConfig; + +pub(crate) use self::reportgen::IfStateDetails; #[cfg(not(wasm_browser))] -use reportgen::SocketState; +use self::reportgen::SocketState; +pub use self::{ + metrics::Metrics, + options::Options, + probes::Probe, + report::{RelayLatencies, Report}, + reportgen::QuicConfig, +}; +use crate::util::MaybeFuture; const FULL_REPORT_INTERVAL: Duration = Duration::from_secs(5 * 60); /// The maximum latency of all nodes, if none are found yet. /// /// Normally the max latency of all nodes is computed, but if we don't yet know any nodes -/// latencies we return this as default. This is the value of the initial STUN probe +/// latencies we return this as default. This is the value of the initial QAD probe /// delays. It is only used as time to wait for further latencies to arrive, which *should* /// never happen unless there already is at least one latency. Yet here we are, defining a /// default which will never be used. const DEFAULT_MAX_LATENCY: Duration = Duration::from_millis(100); -/// A net_report report. -/// -/// Can be obtained by calling [`Client::get_report`]. -#[derive(Default, Debug, PartialEq, Eq, Clone)] -pub struct Report { - /// A UDP STUN round trip completed. - pub udp: bool, - /// An IPv6 STUN round trip completed. - pub ipv6: bool, - /// An IPv4 STUN round trip completed. - pub ipv4: bool, - /// An IPv6 packet was able to be sent - pub ipv6_can_send: bool, - /// an IPv4 packet was able to be sent - pub ipv4_can_send: bool, - /// could bind a socket to ::1 - pub os_has_ipv6: bool, - /// An ICMPv4 round trip completed, `None` if not checked. - pub icmpv4: Option, - /// An ICMPv6 round trip completed, `None` if not checked. - pub icmpv6: Option, - /// Whether STUN results depend on which STUN server you're talking to (on IPv4). - pub mapping_varies_by_dest_ip: Option, - /// Whether STUN results depend on which STUN server you're talking to (on IPv6). +const ENOUGH_NODES: usize = 3; + +/// Client to run net_reports. +#[derive(Debug)] +pub(crate) struct Client { + #[cfg(not(wasm_browser))] + socket_state: SocketState, + metrics: Arc, + probes: BTreeSet, + relay_map: RelayMap, + #[cfg(not(wasm_browser))] + qad_conns: QadConns, + #[cfg(any(test, feature = "test-utils"))] + insecure_skip_relay_cert_verify: bool, + + /// A collection of previously generated reports. /// - /// Note that we don't really expect this to happen and are merely logging this if - /// detecting rather than using it. For now. - pub mapping_varies_by_dest_ipv6: Option, - /// Whether the router supports communicating between two local devices through the NATted - /// public IP address (on IPv4). - pub hair_pinning: Option, - /// Probe indicating the presence of port mapping protocols on the LAN. - pub portmap_probe: Option, - /// `None` for unknown - pub preferred_relay: Option, - /// keyed by relay Url - pub relay_latency: RelayLatencies, - /// keyed by relay Url - pub relay_v4_latency: RelayLatencies, - /// keyed by relay Url - pub relay_v6_latency: RelayLatencies, - /// ip:port of global IPv4 - pub global_v4: Option, - /// `[ip]:port` of global IPv6 - pub global_v6: Option, - /// CaptivePortal is set when we think there's a captive portal that is - /// intercepting HTTP traffic. - pub captive_portal: Option, + /// Sometimes it is useful to look at past reports to decide what to do. + reports: Reports, } -impl fmt::Display for Report { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self, f) - } +#[cfg(not(wasm_browser))] +#[derive(Debug, Default)] +struct QadConns { + v4: Option<(RelayUrl, QadConn)>, + v6: Option<(RelayUrl, QadConn)>, } -/// Latencies per relay node. -#[derive(Debug, Default, PartialEq, Eq, Clone)] -pub struct RelayLatencies(BTreeMap); - -impl RelayLatencies { - fn new() -> Self { - Default::default() - } - - /// Updates a relay's latency, if it is faster than before. - fn update_relay(&mut self, url: RelayUrl, latency: Duration) { - let val = self.0.entry(url).or_insert(latency); - if latency < *val { - *val = latency; +#[cfg(not(wasm_browser))] +impl QadConns { + fn clear(&mut self) { + if let Some((_, conn)) = self.v4.take() { + conn.conn + .close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON); + } + if let Some((_, conn)) = self.v6.take() { + conn.conn + .close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON); } } - /// Merges another [`RelayLatencies`] into this one. - /// - /// For each relay the latency is updated using [`RelayLatencies::update_relay`]. - fn merge(&mut self, other: &RelayLatencies) { - for (url, latency) in other.iter() { - self.update_relay(url.clone(), latency); + fn current(&self) -> Vec { + let mut reports = Vec::new(); + if let Some((_, ref conn)) = self.v4 { + if let Some(mut r) = conn.observer.get() { + // grab latest rtt + r.latency = conn.conn.rtt(); + reports.push(ProbeReport::QadIpv4(r)); + } } - } - /// Returns the maximum latency for all relays. - /// - /// If there are not yet any latencies this will return [`DEFAULT_MAX_LATENCY`]. - fn max_latency(&self) -> Duration { - self.0 - .values() - .max() - .copied() - .unwrap_or(DEFAULT_MAX_LATENCY) - } + if let Some((_, ref conn)) = self.v6 { + if let Some(mut r) = conn.observer.get() { + // grab latest rtt + r.latency = conn.conn.rtt(); + reports.push(ProbeReport::QadIpv6(r)); + } + } - /// Returns an iterator over all the relays and their latencies. - pub fn iter(&self) -> impl Iterator + '_ { - self.0.iter().map(|(k, v)| (k, *v)) + reports } - fn len(&self) -> usize { - self.0.len() - } + fn watch_v4(&self) -> impl n0_future::Stream> + Unpin { + let watcher = self.v4.as_ref().map(|(_url, conn)| conn.observer.watch()); - fn is_empty(&self) -> bool { - self.0.is_empty() + if let Some(watcher) = watcher { + watcher.stream_updates_only().boxed() + } else { + n0_future::stream::empty().boxed() + } } - fn get(&self, url: &RelayUrl) -> Option { - self.0.get(url).copied() + fn watch_v6(&self) -> impl n0_future::Stream> + Unpin { + let watcher = self.v6.as_ref().map(|(_url, conn)| conn.observer.watch()); + if let Some(watcher) = watcher { + watcher.stream_updates_only().boxed() + } else { + n0_future::stream::empty().boxed() + } } } -/// Client to run net_reports. -/// -/// Creating this creates a net_report actor which runs in the background. Most of the time -/// it is idle unless [`Client::get_report`] is called, which is the main interface. -/// -/// The [`Client`] struct can be cloned and results multiple handles to the running actor. -/// If all [`Client`]s are dropped the actor stops running. -/// -/// While running the net_report actor expects to be passed all received stun packets using -/// `Addr::receive_stun_packet`. +#[cfg(not(wasm_browser))] #[derive(Debug)] -pub struct Client { - /// Channel to send message to the [`Actor`]. - /// - /// If all senders are dropped, in other words all clones of this struct are dropped, - /// the actor will terminate. - addr: Addr, - /// Ensures the actor is terminated when the client is dropped. - _drop_guard: Arc>, +struct QadConn { + conn: quinn::Connection, + observer: Watchable>, + _handle: AbortOnDropHandle<()>, } #[derive(Debug)] @@ -222,9 +193,9 @@ struct Reports { /// Do a full relay scan, even if last is `Some`. next_full: bool, /// Some previous reports. - prev: HashMap>, + prev: BTreeMap, /// Most recent report. - last: Option>, + last: Option, /// Time of last full (non-incremental) report. last_full: Instant, } @@ -232,7 +203,7 @@ struct Reports { impl Default for Reports { fn default() -> Self { Self { - next_full: Default::default(), + next_full: true, prev: Default::default(), last: Default::default(), last_full: Instant::now(), @@ -242,363 +213,63 @@ impl Default for Reports { impl Client { /// Creates a new net_report client. - /// - /// This starts a connected actor in the background. Once the client is dropped it will - /// stop running. - pub fn new( - #[cfg(not(wasm_browser))] port_mapper: Option, + pub(crate) fn new( #[cfg(not(wasm_browser))] dns_resolver: DnsResolver, #[cfg(not(wasm_browser))] ip_mapped_addrs: Option, + relay_map: RelayMap, + opts: Options, metrics: Arc, ) -> Self { - let mut actor = Actor::new( - #[cfg(not(wasm_browser))] - port_mapper, - #[cfg(not(wasm_browser))] - dns_resolver, - #[cfg(not(wasm_browser))] - ip_mapped_addrs, - metrics, - ); - let addr = actor.addr(); - let task = task::spawn( - async move { actor.run().await }.instrument(info_span!("net_report.actor")), - ); - let drop_guard = AbortOnDropHandle::new(task); - Client { - addr, - _drop_guard: Arc::new(drop_guard), - } - } - - /// Returns a new address to send messages to this actor. - /// - /// Unlike the client itself the returned [`Addr`] does not own the actor task, it only - /// allows sending messages to the actor. - pub fn addr(&self) -> Addr { - self.addr.clone() - } + let probes = opts.as_protocols(); + #[cfg(any(test, feature = "test-utils"))] + let insecure_skip_relay_cert_verify = opts.insecure_skip_relay_cert_verify; - /// Runs a net_report, returning the report. - /// - /// It may not be called concurrently with itself, `&mut self` takes care of that. - /// - /// The *stun_conn4* and *stun_conn6* endpoints are bound UDP sockets to use to send out - /// STUN packets. This function **will not read from the sockets**, as they may be - /// receiving other traffic as well, normally they are the sockets carrying the real - /// traffic. Thus all stun packets received on those sockets should be passed to - /// `Addr::receive_stun_packet` in order for this function to receive the stun - /// responses and function correctly. - /// - /// If these are not passed in this will bind sockets for STUN itself, though results - /// may not be as reliable. - /// - /// The *quic_config* takes a [`QuicConfig`], a combination of a QUIC endpoint and - /// a client configuration that can be use for verifying the relay server connection. - /// When available, the report will attempt to get an observed public address - /// using QUIC address discovery. - /// - /// When `None`, it will disable the QUIC address discovery probes. - /// - /// This will attempt to use *all* probe protocols. - #[cfg(test)] - pub async fn get_report_all( - &mut self, - relay_map: RelayMap, - #[cfg(not(wasm_browser))] stun_sock_v4: Option>, - #[cfg(not(wasm_browser))] stun_sock_v6: Option>, - #[cfg(not(wasm_browser))] quic_config: Option, - ) -> Result, ReportError> { #[cfg(not(wasm_browser))] - let opts = Options::default() - .stun_v4(stun_sock_v4) - .stun_v6(stun_sock_v6) - .quic_config(quic_config); - #[cfg(wasm_browser)] - let opts = Options::default(); - - let rx = self.get_report_channel(relay_map, opts).await?; - match rx.await { - Ok(res) => res, - Err(_) => Err(ActorGoneSnafu.build()), - } - } - - /// Runs a net_report, returning the report. - /// - /// It may not be called concurrently with itself, `&mut self` takes care of that. - /// - /// Look at [`Options`] for the different configuration options. - pub async fn get_report( - &mut self, - relay_map: RelayMap, - opts: Options, - ) -> Result, ReportError> { - let rx = self.get_report_channel(relay_map, opts).await?; - match rx.await { - Ok(res) => res, - Err(_) => Err(ActorGoneSnafu.build()), - } - } + let quic_client = opts + .quic_config + .map(|c| iroh_relay::quic::QuicClient::new(c.ep, c.client_config)); - /// Get report with channel - /// - /// Look at [`Options`] for the different configuration options. - pub(crate) async fn get_report_channel( - &mut self, - relay_map: RelayMap, - opts: Options, - ) -> Result, ReportError>>, ReportError> { - let (tx, rx) = oneshot::channel(); - self.addr - .send(Message::RunCheck { - relay_map, - opts, - response_tx: tx, - }) - .await - .map_err(|_| ActorGoneSnafu.build())?; - Ok(rx) - } -} - -#[derive(Debug)] -pub(crate) struct Inflight { - /// The STUN transaction ID. - txn: stun::TransactionId, - /// The time the STUN probe was sent. - start: Instant, - /// Response to send STUN results: latency of STUN response and the discovered address. - s: sync::oneshot::Sender<(Duration, SocketAddr)>, -} - -/// Messages to send to the [`Actor`]. -#[derive(Debug)] -#[allow(clippy::large_enum_variant)] -pub(crate) enum Message { - /// Run a net_report. - /// - /// Only one net_report can be run at a time, trying to run multiple concurrently will - /// fail. - RunCheck { - /// The map of relays we want to probe - relay_map: RelayMap, - /// Options for the report - opts: Options, - /// Channel to receive the response. - response_tx: oneshot::Sender, ReportError>>, - }, - /// A report produced by the [`reportgen`] actor. - ReportReady { report: Box }, - /// The [`reportgen`] actor failed to produce a report. - ReportAborted { reason: ActorRunError }, - /// An incoming STUN packet to parse. - StunPacket { - /// The raw UDP payload. - payload: Bytes, - /// The address this was claimed to be received from. - from_addr: SocketAddr, - }, - /// A probe wants to register an in-flight STUN request. - /// - /// The sender is signalled once the STUN packet is registered with the actor and will - /// correctly accept the STUN response. - InFlightStun(Inflight, oneshot::Sender<()>), -} - -/// Sender to the main service. -/// -/// Unlike [`Client`] this is the raw channel to send messages over. Keeping this alive -/// will not keep the actor alive, which makes this handy to pass to internal tasks. -#[derive(Debug, Clone)] -pub struct Addr { - sender: mpsc::Sender, - metrics: Arc, -} - -impl Addr { - /// Pass a received STUN packet to the net_reporter. - /// - /// Normally the UDP sockets to send STUN messages from are passed in so that STUN - /// packets are sent from the sockets that carry the real traffic. However because - /// these sockets carry real traffic they will also receive non-STUN traffic, thus the - /// net_report actor does not read from the sockets directly. If you receive a STUN - /// packet on the socket you should pass it to this method. - /// - /// It is safe to call this even when the net_report actor does not currently have any - /// in-flight STUN probes. The actor will simply ignore any stray STUN packets. - /// - /// There is an implicit queue here which may drop packets if the actor does not keep up - /// consuming them. - pub fn receive_stun_packet(&self, payload: Bytes, src: SocketAddr) { - if let Err(mpsc::error::TrySendError::Full(_)) = self.sender.try_send(Message::StunPacket { - payload, - from_addr: src, - }) { - self.metrics.stun_packets_dropped.inc(); - warn!("dropping stun packet from {}", src); - } - } - - async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> { - self.sender.send(msg).await.inspect_err(|_| { - error!("net_report actor lost"); - }) - } -} - -/// The net_report actor. -/// -/// This actor runs for the entire duration there's a [`Client`] connected. -#[derive(Debug)] -struct Actor { - // Actor plumbing. - /// Actor messages channel. - /// - /// If there are no more senders the actor stops. - receiver: mpsc::Receiver, - /// The sender side of the messages channel. - /// - /// This allows creating new [`Addr`]s from the actor. - sender: mpsc::Sender, - /// A collection of previously generated reports. - /// - /// Sometimes it is useful to look at past reports to decide what to do. - reports: Reports, - - // Actor configuration. - /// The port mapper client, if those are requested. - /// - /// The port mapper is responsible for talking to routers via UPnP and the like to try - /// and open ports. - #[cfg(not(wasm_browser))] - port_mapper: Option, - - // Actor state. - /// Information about the currently in-flight STUN requests. - /// - /// This is used to complete the STUN probe when receiving STUN packets. - in_flight_stun_requests: HashMap, - /// The [`reportgen`] actor currently generating a report. - current_report_run: Option, - - /// The DNS resolver to use for probes that need to perform DNS lookups - #[cfg(not(wasm_browser))] - dns_resolver: DnsResolver, - - /// The [`IpMappedAddresses`] that allows you to do QAD in iroh - #[cfg(not(wasm_browser))] - ip_mapped_addrs: Option, - metrics: Arc, -} - -impl Actor { - /// Creates a new actor. - /// - /// This does not start the actor, see [`Actor::run`] for this. You should not - /// normally create this directly but rather create a [`Client`]. - fn new( - #[cfg(not(wasm_browser))] port_mapper: Option, - #[cfg(not(wasm_browser))] dns_resolver: DnsResolver, - #[cfg(not(wasm_browser))] ip_mapped_addrs: Option, - metrics: Arc, - ) -> Self { - // TODO: consider an instrumented flume channel so we have metrics. - let (sender, receiver) = mpsc::channel(32); - Self { - receiver, - sender, - reports: Default::default(), - #[cfg(not(wasm_browser))] - port_mapper, - in_flight_stun_requests: Default::default(), - current_report_run: None, - #[cfg(not(wasm_browser))] + #[cfg(not(wasm_browser))] + let socket_state = SocketState { + quic_client, dns_resolver, - #[cfg(not(wasm_browser))] ip_mapped_addrs, - metrics, - } - } - - /// Returns the channel to send messages to the actor. - fn addr(&self) -> Addr { - Addr { - sender: self.sender.clone(), - metrics: self.metrics.clone(), - } - } + }; - /// Run the actor. - /// - /// It will now run and handle messages. Once the connected [`Client`] (including all - /// its clones) is dropped this will terminate. - async fn run(&mut self) { - debug!("net_report actor starting"); - while let Some(msg) = self.receiver.recv().await { - trace!(?msg, "handling message"); - match msg { - Message::RunCheck { - relay_map, - opts, - response_tx, - } => { - self.handle_run_check(relay_map, opts, response_tx); - } - Message::ReportReady { report } => { - self.handle_report_ready(*report); - } - Message::ReportAborted { reason: err } => { - self.handle_report_aborted(err); - } - Message::StunPacket { payload, from_addr } => { - self.handle_stun_packet(&payload, from_addr); - } - Message::InFlightStun(inflight, response_tx) => { - self.handle_in_flight_stun(inflight, response_tx); - } - } + Client { + #[cfg(not(wasm_browser))] + socket_state, + metrics, + reports: Reports::default(), + probes, + relay_map, + #[cfg(not(wasm_browser))] + qad_conns: QadConns::default(), + #[cfg(any(test, feature = "test-utils"))] + insecure_skip_relay_cert_verify, } } - /// Starts a check run as requested by the [`Message::RunCheck`] message. + /// Generates a [`Report`]. /// - /// If *stun_sock_v4* or *stun_sock_v6* are not provided this will bind the sockets - /// itself. This is not ideal since really you want to send STUN probes from the - /// sockets you will be using. - fn handle_run_check( - &mut self, - relay_map: RelayMap, - opts: Options, - response_tx: oneshot::Sender, ReportError>>, - ) { - let protocols = opts.to_protocols(); - #[cfg(not(wasm_browser))] - let socket_state = SocketState { - port_mapper: self.port_mapper.clone(), - stun_sock4: opts.stun_sock_v4, - stun_sock6: opts.stun_sock_v6, - quic_config: opts.quic_config, - dns_resolver: self.dns_resolver.clone(), - ip_mapped_addrs: self.ip_mapped_addrs.clone(), - }; - trace!("Attempting probes for protocols {protocols:#?}"); - if self.current_report_run.is_some() { - response_tx.send(Err(AlreadyRunningSnafu.build())).ok(); - return; - } - + /// Look at [`Options`] for the different configuration options. + pub(crate) async fn get_report(&mut self, if_state: IfStateDetails, is_major: bool) -> Report { let now = Instant::now(); - let mut do_full = self.reports.next_full + let mut do_full = is_major + || self.reports.next_full || now.duration_since(self.reports.last_full) > FULL_REPORT_INTERVAL; + debug!(%do_full, "net_report starting"); + // If the last report had a captive portal and reported no UDP access, // it's possible that we didn't get a useful net_report due to the // captive portal blocking us. If so, make this report a full (non-incremental) one. if !do_full { if let Some(ref last) = self.reports.last { - do_full = !last.udp && last.captive_portal.unwrap_or_default(); + if !last.has_udp() && last.captive_portal == Some(true) { + do_full = true; + } } } if do_full { @@ -609,130 +280,345 @@ impl Actor { } self.metrics.reports.inc(); - let actor = reportgen::Client::new( - self.addr(), + let enough_relays = std::cmp::min(self.relay_map.len(), ENOUGH_NODES); + #[cfg(wasm_browser)] + let if_state = IfStateDetails::default(); + #[cfg(not(wasm_browser))] + let if_state = IfStateDetails { + have_v4: if_state.have_v4, + have_v6: if_state.have_v6, + }; + + let mut report = Report::default(); + + // Start the reportgen client to start any needed probes + let (actor, mut probe_rx) = reportgen::Client::new( self.reports.last.clone(), - relay_map, - protocols, - self.metrics.clone(), + self.relay_map.clone(), + self.probes.clone(), + if_state.clone(), #[cfg(not(wasm_browser))] - socket_state, + self.socket_state.clone(), #[cfg(any(test, feature = "test-utils"))] - opts.insecure_skip_relay_cert_verify, + self.insecure_skip_relay_cert_verify, ); - self.current_report_run = Some(ReportRun { - _reportgen: actor, - report_tx: response_tx, - }); - } + #[cfg(not(wasm_browser))] + let reports = self + .spawn_qad_probes(&if_state, enough_relays, do_full) + .await; - fn handle_report_ready(&mut self, report: Report) { - let report = self.finish_and_store_report(report); - self.in_flight_stun_requests.clear(); - if let Some(ReportRun { report_tx, .. }) = self.current_report_run.take() { - report_tx.send(Ok(report)).ok(); + #[cfg(not(wasm_browser))] + for r in reports { + report.update(&r); } - } - fn handle_report_aborted(&mut self, reason: ActorRunError) { - self.in_flight_stun_requests.clear(); - if let Some(ReportRun { report_tx, .. }) = self.current_report_run.take() { - report_tx.send(Err(AbortSnafu { reason }.build())).ok(); + let mut timeout_fut = std::pin::pin!(MaybeFuture::default()); + + #[cfg(not(wasm_browser))] + let mut qad_v4_stream = self.qad_conns.watch_v4(); + #[cfg(wasm_browser)] + let mut qad_v4_stream = n0_future::stream::empty::>(); + #[cfg(not(wasm_browser))] + let mut qad_v6_stream = self.qad_conns.watch_v6(); + #[cfg(wasm_browser)] + let mut qad_v6_stream = n0_future::stream::empty::>(); + + loop { + tokio::select! { + biased; + + _ = &mut timeout_fut, if timeout_fut.is_some() => { + trace!("timeout done, shutting down"); + drop(actor); // shuts down the probes + break; + } + + Some(Some(r)) = qad_v4_stream.next() => { + #[cfg(not(wasm_browser))] + { + trace!(?r, "new report from QAD V4"); + report.update(&ProbeReport::QadIpv4(r)); + } + } + + Some(Some(r)) = qad_v6_stream.next() => { + #[cfg(not(wasm_browser))] + { + trace!(?r, "new report from QAD V6"); + report.update(&ProbeReport::QadIpv6(r)); + } + } + + maybe_probe = probe_rx.recv() => { + let Some(probe_res) = maybe_probe else { + break; + }; + trace!(?probe_res, "handling probe"); + match probe_res { + ProbeFinished::Regular(probe) => match probe { + Ok(probe) => { + report.update(&probe); + if timeout_fut.is_none() { + if let Some(timeout) = self.have_enough_reports(enough_relays, &report) { + timeout_fut.as_mut().set_future(time::sleep(timeout)); + } + } + } + Err(err) => { + trace!("probe errored: {:?}", err); + } + }, + #[cfg(not(wasm_browser))] + ProbeFinished::CaptivePortal(portal) => { + report.captive_portal = portal; + } + } + } + } } + + self.add_report_history_and_set_preferred_relay(&mut report); + debug!( + ?report, + "generated report in {:02}ms", + now.elapsed().as_millis() + ); + + report } - /// Handles [`Message::StunPacket`]. - /// - /// If there are currently no in-flight stun requests registered this is dropped, - /// otherwise forwarded to the probe. - fn handle_stun_packet(&mut self, pkt: &[u8], src: SocketAddr) { - trace!(%src, "received STUN packet"); - if self.in_flight_stun_requests.is_empty() { - return; + #[cfg(not(wasm_browser))] + async fn spawn_qad_probes( + &mut self, + if_state: &IfStateDetails, + enough_relays: usize, + do_full: bool, + ) -> Vec { + use tracing::{info_span, Instrument}; + + debug!("spawning QAD probes"); + + let Some(ref quic_client) = self.socket_state.quic_client else { + return Vec::new(); + }; + + if do_full { + // clear out existing connections if we are doing a full reset + self.qad_conns.clear(); } - #[cfg(feature = "metrics")] - match &src { - SocketAddr::V4(_) => { - self.metrics.stun_packets_recv_ipv4.inc(); + if let Some((url, conn)) = &self.qad_conns.v4 { + // verify conn is still around + if let Some(reason) = conn.conn.close_reason() { + trace!(?url, "QAD v4 conn closed: {}", reason); + self.qad_conns.v4.take(); } - SocketAddr::V6(_) => { - self.metrics.stun_packets_recv_ipv6.inc(); + } + if let Some((url, conn)) = &self.qad_conns.v6 { + // verify conn is still around + if let Some(reason) = conn.conn.close_reason() { + trace!(?url, "QAD v6 conn closed: {}", reason); + self.qad_conns.v6.take(); } } + if self.qad_conns.v4.is_some() && self.qad_conns.v6.is_some() == if_state.have_v6 { + trace!("not spawning QAD, already have probes"); + return self.qad_conns.current(); + } - match stun::parse_response(pkt) { - Ok((txn, addr_port)) => match self.in_flight_stun_requests.remove(&txn) { - Some(inf) => { - debug!(%src, %txn, "received known STUN packet"); - let elapsed = inf.start.elapsed(); - inf.s.send((elapsed, addr_port)).ok(); - } - None => { - debug!(%src, %txn, "received unexpected STUN message response"); - } - }, - Err(err) => { - match stun::parse_binding_request(pkt) { - Ok(txn) => { - // Is this our hairpin request? - match self.in_flight_stun_requests.remove(&txn) { - Some(inf) => { - debug!(%src, %txn, "received our hairpin STUN request"); - let elapsed = inf.start.elapsed(); - inf.s.send((elapsed, src)).ok(); + // TODO: randomize choice? + const MAX_RELAYS: usize = 5; + + let mut v4_buf = JoinSet::new(); + let cancel_v4 = CancellationToken::new(); + let mut v6_buf = JoinSet::new(); + let cancel_v6 = CancellationToken::new(); + + for relay_node in self.relay_map.nodes().take(MAX_RELAYS) { + if if_state.have_v4 { + debug!(?relay_node.url, "v4 QAD probe"); + let ip_mapped_addrs = self.socket_state.ip_mapped_addrs.clone(); + let relay_node = relay_node.clone(); + let dns_resolver = self.socket_state.dns_resolver.clone(); + let quic_client = quic_client.clone(); + let relay_url = relay_node.url.clone(); + v4_buf.spawn( + cancel_v4 + .child_token() + .run_until_cancelled_owned(time::timeout( + PROBES_TIMEOUT, + run_probe_v4(ip_mapped_addrs, relay_node, quic_client, dns_resolver), + )) + .instrument(info_span!("QAD IPv6", %relay_url)), + ); + } + + if if_state.have_v6 { + debug!(?relay_node.url, "v6 QAD probe"); + let ip_mapped_addrs = self.socket_state.ip_mapped_addrs.clone(); + let relay_node = relay_node.clone(); + let dns_resolver = self.socket_state.dns_resolver.clone(); + let quic_client = quic_client.clone(); + let relay_url = relay_node.url.clone(); + v6_buf.spawn( + cancel_v6 + .child_token() + .run_until_cancelled_owned(time::timeout( + PROBES_TIMEOUT, + run_probe_v6(ip_mapped_addrs, relay_node, quic_client, dns_resolver), + )) + .instrument(info_span!("QAD IPv6", %relay_url)), + ); + } + } + + let mut reports = Vec::new(); + + loop { + if reports.len() >= enough_relays { + debug!("enough probes: {}", reports.len()); + cancel_v4.cancel(); + cancel_v6.cancel(); + break; + } + + tokio::select! { + biased; + + val = v4_buf.join_next(), if !v4_buf.is_empty() => { + match val { + Some(Ok(Some(Ok(res)))) => { + match res { + Ok((r, conn)) => { + debug!(?r, "got v4 QAD conn"); + let url = r.node.clone(); + reports.push(ProbeReport::QadIpv4(r)); + if self.qad_conns.v4.is_none() { + self.qad_conns.v4.replace((url, conn)); + } else { + conn.conn.close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON); + } + } + Err(err) => { + debug!("probe v4 failed: {:?}", err); + } } - None => { - debug!(%src, %txn, "unknown STUN request"); + } + Some(Err(err)) => { + if err.is_panic() { + panic!("probe v4 panicked: {:?}", err); } + warn!("probe v4 failed: {:?}", err); } + Some(Ok(None)) => { + debug!("probe v4 canceled"); + } + Some(Ok(Some(Err(time::Elapsed { .. })))) => { + debug!("probe v4 timed out"); + } + None => {} } - Err(_) => { - debug!(%src, "received invalid STUN response: {err:#}"); + } + val = v6_buf.join_next(), if !v6_buf.is_empty() => { + match val { + Some(Ok(Some(Ok(res)))) => { + match res { + Ok((r, conn)) => { + debug!(?r, "got v6 QAD conn"); + let url = r.node.clone(); + reports.push(ProbeReport::QadIpv6(r)); + if self.qad_conns.v6.is_none() { + self.qad_conns.v6.replace((url, conn)); + } else { + conn.conn.close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON); + } + } + Err(err) => { + debug!("probe v6 failed: {:?}", err); + } + } + } + Some(Err(err)) => { + if err.is_panic() { + panic!("probe v6 panicked: {:?}", err); + } + warn!("probe v6 failed: {:?}", err); + } + Some(Ok(None)) => { + debug!("probe v6 canceled"); + } + Some(Ok(Some(Err(time::Elapsed { .. })))) => { + debug!("probe v6 timed out"); + } + None => {} } } + else => { + break; + } } } - } - /// Handles [`Message::InFlightStun`]. - /// - /// The in-flight request is added to [`Actor::in_flight_stun_requests`] so that - /// [`Actor::handle_stun_packet`] can forward packets correctly. - /// - /// *response_tx* is to signal the actor message has been handled. - fn handle_in_flight_stun(&mut self, inflight: Inflight, response_tx: oneshot::Sender<()>) { - self.in_flight_stun_requests.insert(inflight.txn, inflight); - response_tx.send(()).ok(); + reports } - fn finish_and_store_report(&mut self, report: Report) -> Arc { - let report = self.add_report_history_and_set_preferred_relay(report); - debug!("{report:?}"); - report + fn have_enough_reports(&self, enough_relays: usize, report: &Report) -> Option { + // Once we've heard from enough relay servers (3), start a timer to give up on the other + // probes. The timer's duration is a function of whether this is our initial full + // probe or an incremental one. For incremental ones, wait for the duration of the + // slowest relay. For initial ones, double that. + let latencies: Vec = report.relay_latency.iter().map(|(_, l)| l).collect(); + let have_enough_latencies = latencies.len() >= enough_relays; + + if have_enough_latencies { + let timeout = match self.reports.last.is_some() { + true => Duration::from_secs(0), + false => latencies + .iter() + .max() + .copied() + .unwrap_or(DEFAULT_MAX_LATENCY), + }; + debug!( + reports=latencies.len(), + delay=?timeout, + "Have enough probe reports, aborting further probes soon", + ); + + Some(timeout) + } else { + None + } } /// Adds `r` to the set of recent Reports and mutates `r.preferred_relay` to contain the best recent one. - /// `r` is stored ref counted and a reference is returned. - fn add_report_history_and_set_preferred_relay(&mut self, mut r: Report) -> Arc { + fn add_report_history_and_set_preferred_relay(&mut self, r: &mut Report) { let mut prev_relay = None; if let Some(ref last) = self.reports.last { prev_relay.clone_from(&last.preferred_relay); + + // If we don't have new information, copy this from the last report + if r.mapping_varies_by_dest_ipv4.is_none() { + r.mapping_varies_by_dest_ipv4 = last.mapping_varies_by_dest_ipv4; + } + if r.mapping_varies_by_dest_ipv6.is_none() { + r.mapping_varies_by_dest_ipv6 = last.mapping_varies_by_dest_ipv6; + } } + let now = Instant::now(); const MAX_AGE: Duration = Duration::from_secs(5 * 60); // relay ID => its best recent latency in last MAX_AGE - let mut best_recent = RelayLatencies::new(); + let mut best_recent = RelayLatencies::default(); // chain the current report as we are still mutating it let prevs_iter = self .reports .prev .iter() - .map(|(a, b)| -> (&Instant, &Report) { (a, b) }) - .chain(std::iter::once((&now, &r))); + .map(|(a, b)| -> (&Instant, &Report) { (a, b) }); let mut to_remove = Vec::new(); for (t, pr) in prevs_iter { @@ -742,6 +628,8 @@ impl Actor { } best_recent.merge(&pr.relay_latency); } + // merge in current run + best_recent.merge(&r.relay_latency); for t in to_remove { self.reports.prev.remove(&t); @@ -776,137 +664,148 @@ impl Actor { } } - let r = Arc::new(r); self.reports.prev.insert(now, r.clone()); self.reports.last = Some(r.clone()); - - r } } -/// State the net_report actor needs for an in-progress report generation. -#[derive(Debug)] -struct ReportRun { - /// The handle of the [`reportgen`] actor, cancels the actor on drop. - _reportgen: reportgen::Client, - /// Where to send the completed report. - report_tx: oneshot::Sender, ReportError>>, -} - -#[allow(missing_docs)] -#[common_fields({ - backtrace: Option, - #[snafu(implicit)] - span_trace: n0_snafu::SpanTrace, -})] -#[derive(Debug, Snafu)] -#[non_exhaustive] -pub enum ReportError { - #[snafu(display("Report aborted early"))] - Abort { reason: ActorRunError }, - #[snafu(display("Report generation is already running"))] - AlreadyRunning {}, - #[snafu(display("Internal actor is gone"))] - ActorGone {}, -} - -/// Test if IPv6 works at all, or if it's been hard disabled at the OS level. #[cfg(not(wasm_browser))] -fn os_has_ipv6() -> bool { - UdpSocket::bind_local_v6(0).is_ok() -} - -/// Always returns false in browsers -#[cfg(wasm_browser)] -fn os_has_ipv6() -> bool { - false +async fn run_probe_v4( + ip_mapped_addrs: Option, + relay_node: Arc, + quic_client: QuicClient, + dns_resolver: DnsResolver, +) -> n0_snafu::Result<(QadProbeReport, QadConn)> { + use n0_snafu::ResultExt; + + let relay_addr_orig = reportgen::get_relay_addr_ipv4(&dns_resolver, &relay_node).await?; + let relay_addr = + reportgen::maybe_to_mapped_addr(ip_mapped_addrs.as_ref(), relay_addr_orig.into()); + + debug!(?relay_addr_orig, ?relay_addr, "relay addr v4"); + let host = relay_node.url.host_str().context("missing host url")?; + let conn = quic_client.create_conn(relay_addr, host).await?; + let mut receiver = conn.observed_external_addr(); + + // wait for an addr + let addr = receiver + .wait_for(|addr| addr.is_some()) + .await + .context("receiver dropped")? + .expect("known"); + let report = QadProbeReport { + node: relay_node.url.clone(), + addr: SocketAddr::new(addr.ip().to_canonical(), addr.port()), + latency: conn.rtt(), + }; + + let observer = Watchable::new(None); + let ob = observer.clone(); + let node = relay_node.url.clone(); + let conn2 = conn.clone(); + let handle = task::spawn(async move { + loop { + let val = *receiver.borrow(); + // if we've sent to an ipv4 address, but received an observed address + // that is ivp6 then the address is an [IPv4-Mapped IPv6 Addresses](https://doc.rust-lang.org/beta/std/net/struct.Ipv6Addr.html#ipv4-mapped-ipv6-addresses) + let val = val.map(|val| SocketAddr::new(val.ip().to_canonical(), val.port())); + let latency = conn2.rtt(); + trace!(?val, ?relay_addr, ?latency, "got addr V4"); + if ob + .set(val.map(|addr| QadProbeReport { + node: node.clone(), + addr, + latency, + })) + .is_err() + { + // cancel if the observer is gone + break; + } + if receiver.changed().await.is_err() { + break; + } + } + }); + let handle = AbortOnDropHandle::new(handle); + + Ok(( + report, + QadConn { + conn, + observer, + _handle: handle, + }, + )) } #[cfg(not(wasm_browser))] -pub(crate) mod stun_utils { - use netwatch::IpFamily; - use tokio_util::sync::CancellationToken; - - use super::*; - - /// Attempts to bind a local socket to send STUN packets from. - /// - /// If successful this returns the bound socket and will forward STUN responses to the - /// provided *actor_addr*. The *cancel_token* serves to stop the packet forwarding when the - /// socket is no longer needed. - pub fn bind_local_stun_socket( - network: IpFamily, - actor_addr: Addr, - cancel_token: CancellationToken, - ) -> Option> { - let sock = match UdpSocket::bind(network, 0) { - Ok(sock) => Arc::new(sock), - Err(err) => { - debug!("failed to bind STUN socket: {}", err); - return None; +async fn run_probe_v6( + ip_mapped_addrs: Option, + relay_node: Arc, + quic_client: QuicClient, + dns_resolver: DnsResolver, +) -> n0_snafu::Result<(QadProbeReport, QadConn)> { + use n0_snafu::ResultExt; + let relay_addr_orig = reportgen::get_relay_addr_ipv6(&dns_resolver, &relay_node).await?; + let relay_addr = + reportgen::maybe_to_mapped_addr(ip_mapped_addrs.as_ref(), relay_addr_orig.into()); + + debug!(?relay_addr_orig, ?relay_addr, "relay addr v6"); + let host = relay_node.url.host_str().context("missing host url")?; + let conn = quic_client.create_conn(relay_addr, host).await?; + let mut receiver = conn.observed_external_addr(); + + // wait for an addr + let addr = receiver + .wait_for(|addr| addr.is_some()) + .await + .context("receiver dropped")? + .expect("known"); + let report = QadProbeReport { + node: relay_node.url.clone(), + addr: SocketAddr::new(addr.ip().to_canonical(), addr.port()), + latency: conn.rtt(), + }; + + let observer = Watchable::new(None); + let ob = observer.clone(); + let node = relay_node.url.clone(); + let conn2 = conn.clone(); + let handle = task::spawn(async move { + loop { + let val = *receiver.borrow(); + // if we've sent to an ipv4 address, but received an observed address + // that is ivp6 then the address is an IPv4-Mapped IPv6 Addresses + let val = val.map(|val| SocketAddr::new(val.ip().to_canonical(), val.port())); + let latency = conn2.rtt(); + trace!(?val, ?relay_addr, ?latency, "got addr V6"); + if ob + .set(val.map(|addr| QadProbeReport { + node: node.clone(), + addr, + latency, + })) + .is_err() + { + // cancel if the observer is gone + break; + } + if receiver.changed().await.is_err() { + break; } - }; - let span = info_span!( - "stun_udp_listener", - local_addr = sock - .local_addr() - .map(|a| a.to_string()) - .unwrap_or(String::from("-")), - ); - { - let sock = sock.clone(); - task::spawn( - async move { - debug!("udp stun socket listener started"); - // TODO: Can we do better for buffers here? Probably doesn't matter much. - let mut buf = vec![0u8; 64 << 10]; - loop { - tokio::select! { - biased; - _ = cancel_token.cancelled() => break, - res = recv_stun_once(&sock, &mut buf, &actor_addr) => { - if let Err(err) = res { - warn!(%err, "stun recv failed"); - break; - } - } - } - } - debug!("udp stun socket listener stopped"); - } - .instrument(span), - ); } - Some(sock) - } - - #[derive(Debug, Snafu)] - enum RecvStunError { - #[snafu(transparent)] - Recv { source: std::io::Error }, - #[snafu(display("Internal actor is gone"))] - ActorGone, - } - - /// Receive STUN response from a UDP socket, pass it to the actor. - async fn recv_stun_once( - sock: &UdpSocket, - buf: &mut [u8], - actor_addr: &Addr, - ) -> Result<(), RecvStunError> { - let (count, mut from_addr) = sock.recv_from(buf).await?; - - let payload = &buf[..count]; - from_addr.set_ip(from_addr.ip().to_canonical()); - let msg = Message::StunPacket { - payload: Bytes::from(payload.to_vec()), - from_addr, - }; - actor_addr - .send(msg) - .await - .map_err(|_| ActorGoneSnafu.build()) - } + }); + let handle = AbortOnDropHandle::new(handle); + + Ok(( + report, + QadConn { + conn, + observer, + _handle: handle, + }, + )) } #[cfg(test)] @@ -924,8 +823,6 @@ mod test_utils { }); let node_desc = RelayNode { url: server.https_url().expect("should work as relay"), - stun_only: false, // the checks above and below guarantee both stun and relay - stun_port: server.stun_addr().expect("server should serve stun").port(), quic, }; @@ -950,173 +847,55 @@ mod test_utils { #[cfg(test)] mod tests { - use bytes::BytesMut; + use std::net::{Ipv4Addr, SocketAddr}; + + use iroh_base::RelayUrl; + use iroh_relay::dns::DnsResolver; use n0_snafu::{Result, ResultExt}; - use netwatch::IpFamily; use tokio_util::sync::CancellationToken; - use tracing::info; use tracing_test::traced_test; use super::*; - use crate::net_report::{dns, stun_utils::bind_local_stun_socket}; - - mod stun_utils { - //! Utils for testing that expose a simple stun server. - - use std::{net::IpAddr, sync::Arc}; - - use iroh_base::RelayUrl; - use iroh_relay::RelayNode; - use tokio::{ - net, - sync::{oneshot, Mutex}, - }; - use tracing::{debug, trace}; - - use super::*; - - /// A drop guard to clean up test infrastructure. - /// - /// After dropping the test infrastructure will asynchronously shutdown and release its - /// resources. - // Nightly sees the sender as dead code currently, but we only rely on Drop of the - // sender. - #[derive(Debug)] - pub struct CleanupDropGuard { - _guard: oneshot::Sender<()>, - } - - // (read_ipv4, read_ipv6) - #[derive(Debug, Default, Clone)] - pub struct StunStats(Arc>); - - impl StunStats { - pub async fn total(&self) -> usize { - let s = self.0.lock().await; - s.0 + s.1 - } - } - - pub fn relay_map_of(stun: impl Iterator) -> RelayMap { - relay_map_of_opts(stun.map(|addr| (addr, true))) - } - - pub fn relay_map_of_opts(stun: impl Iterator) -> RelayMap { - let nodes = stun.map(|(addr, stun_only)| { - let host = addr.ip(); - let port = addr.port(); - - let url: RelayUrl = format!("http://{host}:{port}").parse().unwrap(); - RelayNode { - url, - stun_port: port, - stun_only, - quic: None, - } - }); - RelayMap::from_iter(nodes) - } + use crate::net_report::probes::Probe; - /// Sets up a simple STUN server binding to `0.0.0.0:0`. - /// - /// See [`serve`] for more details. - pub(crate) async fn serve_v4() -> std::io::Result<(SocketAddr, StunStats, CleanupDropGuard)> - { - serve(std::net::Ipv4Addr::UNSPECIFIED.into()).await - } - - /// Sets up a simple STUN server. - pub(crate) async fn serve( - ip: IpAddr, - ) -> std::io::Result<(SocketAddr, StunStats, CleanupDropGuard)> { - let stats = StunStats::default(); - - let pc = net::UdpSocket::bind((ip, 0)).await?; - let mut addr = pc.local_addr()?; - match addr.ip() { - IpAddr::V4(ip) => { - if ip.octets() == [0, 0, 0, 0] { - addr.set_ip("127.0.0.1".parse().unwrap()); - } - } - _ => unreachable!("using ipv4"), - } - - println!("STUN listening on {}", addr); - let (_guard, r) = oneshot::channel(); - let stats_c = stats.clone(); - tokio::task::spawn(async move { - run_stun(pc, stats_c, r).await; - }); - - Ok((addr, stats, CleanupDropGuard { _guard })) - } - - async fn run_stun(pc: net::UdpSocket, stats: StunStats, mut done: oneshot::Receiver<()>) { - let mut buf = vec![0u8; 64 << 10]; - loop { - trace!("read loop"); - tokio::select! { - _ = &mut done => { - debug!("shutting down"); - break; - } - res = pc.recv_from(&mut buf) => match res { - Ok((n, addr)) => { - trace!("read packet {}bytes from {}", n, addr); - let pkt = &buf[..n]; - if !stun::is(pkt) { - debug!("received non STUN pkt"); - continue; - } - if let Ok(txid) = stun::parse_binding_request(pkt) { - debug!("received binding request"); - let mut s = stats.0.lock().await; - if addr.is_ipv4() { - s.0 += 1; - } else { - s.1 += 1; - } - drop(s); - - let res = stun::response(txid, addr); - if let Err(err) = pc.send_to(&res, addr).await { - eprintln!("STUN server write failed: {:?}", err); - } - } - } - Err(err) => { - eprintln!("failed to read: {:?}", err); - } - } - } - } - } - } - - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] #[traced_test] - async fn test_basic() -> Result { - let (stun_addr, stun_stats, _cleanup_guard) = - stun_utils::serve("127.0.0.1".parse().unwrap()).await.e()?; + async fn test_basic() -> Result<()> { + let (server, relay) = test_utils::relay().await; + let client_config = iroh_relay::client::make_dangerous_client_config(); + let ep = quinn::Endpoint::client(SocketAddr::new(Ipv4Addr::LOCALHOST.into(), 0)).e()?; + let quic_addr_disc = QuicConfig { + ep: ep.clone(), + client_config, + ipv4: true, + ipv6: true, + }; + let relay_map = RelayMap::from(relay); - let resolver = dns::tests::resolver(); - let mut client = Client::new(None, resolver.clone(), None, Default::default()); - let dm = stun_utils::relay_map_of([stun_addr].into_iter()); + let resolver = DnsResolver::new(); + let opts = Options::default() + .quic_config(Some(quic_addr_disc.clone())) + .insecure_skip_relay_cert_verify(true); + let mut client = Client::new( + resolver.clone(), + None, + relay_map.clone(), + opts.clone(), + Default::default(), + ); + let if_state = IfStateDetails::fake(); // Note that the ProbePlan will change with each iteration. for i in 0..5 { let cancel = CancellationToken::new(); - let sock = bind_local_stun_socket(IpFamily::V4, client.addr(), cancel.clone()); println!("--round {}", i); - let r = client.get_report_all(dm.clone(), sock, None, None).await?; - - assert!(r.udp, "want UDP"); - assert_eq!( - r.relay_latency.len(), - 1, - "expected 1 key in RelayLatency; got {}", - r.relay_latency.len() + let r = client.get_report(if_state.clone(), false).await; + + assert!(r.has_udp(), "want UDP"); + dbg!(&r); + assert!( + !r.relay_latency.is_empty(), + "expected at least 1 key in RelayLatency; got none", ); assert!( r.relay_latency.iter().next().is_some(), @@ -1128,11 +907,9 @@ mod tests { cancel.cancel(); } - assert!( - stun_stats.total().await >= 5, - "expected at least 5 stun, got {}", - stun_stats.total().await, - ); + drop(client); + ep.wait_idle().await; + server.shutdown().await?; Ok(()) } @@ -1144,23 +921,24 @@ mod tests { } // report returns a *Report from (relay host, Duration)+ pairs. - fn report(a: impl IntoIterator) -> Option> { + fn report(a: impl IntoIterator) -> Option { let mut report = Report::default(); for (s, d) in a { assert!(s.starts_with('d'), "invalid relay server key"); let id: u16 = s[1..].parse().unwrap(); - report - .relay_latency - .0 - .insert(relay_url(id), Duration::from_secs(d)); + report.relay_latency.update_relay( + relay_url(id), + Duration::from_secs(d), + Probe::QadIpv4, + ); } - Some(Arc::new(report)) + Some(report) } struct Step { /// Delay in seconds after: u64, - r: Option>, + r: Option, } struct Test { name: &'static str, @@ -1296,18 +1074,20 @@ mod tests { want_relay: Some(relay_url(2)), // 2 got fast enough }, ]; - let resolver = dns::tests::resolver(); + let resolver = DnsResolver::new(); for mut tt in tests { println!("test: {}", tt.name); - let mut actor = Actor::new(None, resolver.clone(), None, Default::default()); + let relay_map = RelayMap::empty(); + let opts = Options::default(); + let mut client = + Client::new(resolver.clone(), None, relay_map, opts, Default::default()); for s in &mut tt.steps { // trigger the timer tokio::time::advance(Duration::from_secs(s.after)).await; - let r = Arc::try_unwrap(s.r.take().unwrap()).unwrap(); - s.r = Some(actor.add_report_history_and_set_preferred_relay(r)); + client.add_report_history_and_set_preferred_relay(s.r.as_mut().unwrap()); } let last_report = tt.steps.last().unwrap().r.clone().unwrap(); - let got = actor.reports.prev.len(); + let got = client.reports.prev.len(); let want = tt.want_prev_len; assert_eq!(got, want, "prev length"); let got = &last_report.preferred_relay; @@ -1317,61 +1097,4 @@ mod tests { Ok(()) } - - #[tokio::test] - async fn test_hairpin() -> Result { - // Hairpinning is initiated after we discover our own IPv4 socket address (IP + - // port) via STUN, so the test needs to have a STUN server and perform STUN over - // IPv4 first. Hairpinning detection works by sending a STUN *request* to **our own - // public socket address** (IP + port). If the router supports hairpinning the STUN - // request is returned back to us and received on our public address. This doesn't - // need to be a STUN request, but STUN already has a unique transaction ID which we - // can easily use to identify the packet. - - // Setup STUN server and create relay_map. - let (stun_addr, _stun_stats, _done) = stun_utils::serve_v4().await.e()?; - let dm = stun_utils::relay_map_of([stun_addr].into_iter()); - dbg!(&dm); - - let resolver = dns::tests::resolver().clone(); - let mut client = Client::new(None, resolver, None, Default::default()); - - // Set up an external socket to send STUN requests from, this will be discovered as - // our public socket address by STUN. We send back any packets received on this - // socket to the net_report client using Client::receive_stun_packet. Once we sent - // the hairpin STUN request (from a different randomly bound socket) we are sending - // it to this socket, which is forwarnding it back to our net_report client, because - // this dumb implementation just forwards anything even if it would be garbage. - // Thus hairpinning detection will declare hairpinning to work. - let sock = UdpSocket::bind_local(netwatch::IpFamily::V4, 0).e()?; - let sock = Arc::new(sock); - info!(addr=?sock.local_addr().unwrap(), "Using local addr"); - let task = { - let sock = sock.clone(); - let addr = client.addr.clone(); - tokio::spawn( - async move { - let mut buf = BytesMut::zeroed(64 << 10); - loop { - let (count, src) = sock.recv_from(&mut buf).await.unwrap(); - info!( - addr=?sock.local_addr().unwrap(), - %count, - "Forwarding payload to net_report client", - ); - let payload = buf.split_to(count).freeze(); - addr.receive_stun_packet(payload, src); - } - } - .instrument(info_span!("pkt-fwd")), - ) - }; - - let r = client.get_report_all(dm, Some(sock), None, None).await?; - dbg!(&r); - assert_eq!(r.hair_pinning, Some(true)); - - task.abort(); - Ok(()) - } } diff --git a/iroh/src/net_report/defaults.rs b/iroh/src/net_report/defaults.rs index 66f27097114..75782044aef 100644 --- a/iroh/src/net_report/defaults.rs +++ b/iroh/src/net_report/defaults.rs @@ -11,15 +11,15 @@ pub(crate) mod timeouts { /// The total time we wait for all the probes. /// - /// This includes the STUN, ICMP and HTTPS probes, which will all + /// This includes the QAD and HTTPS probes, which will all /// start at different times based on the ProbePlan. pub(crate) const PROBES_TIMEOUT: Duration = Duration::from_secs(3); /// How long to await for a captive-portal result. /// - /// This delay is chosen so it starts after good-working STUN probes + /// This delay is chosen so it starts after good-working QAD probes /// would have finished, but not too long so the delay is bearable if - /// STUN is blocked. + /// UDP/QAD is blocked. pub(crate) const CAPTIVE_PORTAL_DELAY: Duration = Duration::from_millis(200); /// Timeout for captive portal checks @@ -29,10 +29,4 @@ pub(crate) mod timeouts { pub(crate) const CAPTIVE_PORTAL_TIMEOUT: Duration = Duration::from_secs(2); pub(crate) const DNS_TIMEOUT: Duration = Duration::from_secs(3); - - /// The amount of time we wait for a hairpinned packet to come back. - pub(crate) const HAIRPIN_CHECK_TIMEOUT: Duration = Duration::from_millis(100); - - /// Default Pinger timeout - pub(crate) const DEFAULT_PINGER_TIMEOUT: Duration = Duration::from_secs(5); } diff --git a/iroh/src/net_report/dns.rs b/iroh/src/net_report/dns.rs deleted file mode 100644 index 1a46b339436..00000000000 --- a/iroh/src/net_report/dns.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Delay used to perform staggered dns queries. -pub(crate) const DNS_STAGGERING_MS: &[u64] = &[200, 300]; - -#[cfg(test)] -pub(crate) mod tests { - use iroh_relay::dns::DnsResolver; - - /// Get a DNS resolver suitable for testing. - pub fn resolver() -> DnsResolver { - DnsResolver::new() - } -} diff --git a/iroh/src/net_report/ip_mapped_addrs.rs b/iroh/src/net_report/ip_mapped_addrs.rs index 8a16db7a921..d555d8b506d 100644 --- a/iroh/src/net_report/ip_mapped_addrs.rs +++ b/iroh/src/net_report/ip_mapped_addrs.rs @@ -18,7 +18,7 @@ pub struct IpMappedAddrError; /// /// It is essentially a lookup key for an IP that iroh's magicsocket knows about. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct IpMappedAddr(Ipv6Addr); +pub(crate) struct IpMappedAddr(Ipv6Addr); /// Counter to always generate unique addresses for [`IpMappedAddr`]. static IP_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1); @@ -38,7 +38,7 @@ impl IpMappedAddr { /// /// This generates a new IPv6 address in the Unique Local Address range (RFC 4193) /// which is recognised by iroh as an IP mapped address. - pub fn generate() -> Self { + pub(super) fn generate() -> Self { let mut addr = [0u8; 16]; addr[0] = Self::ADDR_PREFIXL; addr[1..6].copy_from_slice(&Self::ADDR_GLOBAL_ID); @@ -57,7 +57,7 @@ impl IpMappedAddr { /// This uses a made-up, but fixed port number. The [IpMappedAddresses`] map this is /// made for creates a unique [`IpMappedAddr`] for each IP+port and thus does not use /// the port to map back to the original [`SocketAddr`]. - pub fn private_socket_addr(&self) -> SocketAddr { + pub(crate) fn private_socket_addr(&self) -> SocketAddr { SocketAddr::new(IpAddr::from(self.0), Self::MAPPED_ADDR_PORT) } } @@ -88,10 +88,10 @@ impl std::fmt::Display for IpMappedAddr { // mechanisms for keeping track of "aliveness" and pruning address, as we do // with the `NodeMap` #[derive(Debug, Clone, Default)] -pub struct IpMappedAddresses(Arc>); +pub(crate) struct IpMappedAddresses(Arc>); #[derive(Debug, Default)] -pub struct Inner { +pub(super) struct Inner { by_mapped_addr: BTreeMap, /// Because [`std::net::SocketAddrV6`] contains extra fields besides the IP /// address and port (ie, flow_info and scope_id), the a [`std::net::SocketAddrV6`] @@ -101,18 +101,13 @@ pub struct Inner { } impl IpMappedAddresses { - /// Creates an empty [`IpMappedAddresses`]. - pub fn new() -> Self { - Self(Arc::new(std::sync::Mutex::new(Inner::default()))) - } - /// Adds a [`SocketAddr`] to the map and returns the generated [`IpMappedAddr`]. /// /// If this [`SocketAddr`] already exists in the map, it returns its /// associated [`IpMappedAddr`]. /// /// Otherwise a new [`IpMappedAddr`] is generated for it and returned. - pub fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr { + pub(super) fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr { let ip_port = (socket_addr.ip(), socket_addr.port()); let mut inner = self.0.lock().expect("poisoned"); if let Some(mapped_addr) = inner.by_ip_port.get(&ip_port) { @@ -125,14 +120,14 @@ impl IpMappedAddresses { } /// Returns the [`IpMappedAddr`] for the given [`SocketAddr`]. - pub fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option { + pub(crate) fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option { let ip_port = (socket_addr.ip(), socket_addr.port()); let inner = self.0.lock().expect("poisoned"); inner.by_ip_port.get(&ip_port).copied() } /// Returns the [`SocketAddr`] for the given [`IpMappedAddr`]. - pub fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option { + pub(crate) fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option { let inner = self.0.lock().expect("poisoned"); inner.by_mapped_addr.get(mapped_addr).copied() } diff --git a/iroh/src/net_report/metrics.rs b/iroh/src/net_report/metrics.rs index 1698c1d8ae7..ccfd9ba9380 100644 --- a/iroh/src/net_report/metrics.rs +++ b/iroh/src/net_report/metrics.rs @@ -6,16 +6,6 @@ use serde::{Deserialize, Serialize}; #[metrics(name = "net_report")] #[non_exhaustive] pub struct Metrics { - /// Incoming STUN packets dropped due to a full receiving queue. - pub stun_packets_dropped: Counter, - /// Number of IPv4 STUN packets sent. - pub stun_packets_sent_ipv4: Counter, - /// Number of IPv6 STUN packets sent. - pub stun_packets_sent_ipv6: Counter, - /// Number of IPv4 STUN packets received. - pub stun_packets_recv_ipv4: Counter, - /// Number of IPv6 STUN packets received. - pub stun_packets_recv_ipv6: Counter, /// Number of reports executed by net_report, including full reports. pub reports: Counter, /// Number of full reports executed by net_report diff --git a/iroh/src/net_report/options.rs b/iroh/src/net_report/options.rs index 5540f56e976..6fd5e2449aa 100644 --- a/iroh/src/net_report/options.rs +++ b/iroh/src/net_report/options.rs @@ -4,48 +4,21 @@ pub use imp::Options; #[cfg(not(wasm_browser))] mod imp { - use std::{collections::BTreeSet, sync::Arc}; - - use netwatch::UdpSocket; + use std::collections::BTreeSet; - use crate::net_report::{reportgen::ProbeProto, QuicConfig}; + use crate::net_report::{probes::Probe, QuicConfig}; /// Options for running probes /// - /// By default, will run icmp over IPv4, icmp over IPv6, and Https probes. + /// By default, will run Https probes. /// - /// Use [`Options::stun_v4`], [`Options::stun_v6`], and [`Options::quic_config`] - /// to enable STUN over IPv4, STUN over IPv6, and QUIC address discovery. + /// Use [`Options::quic_config`] to enable QUIC address discovery. #[derive(Debug, Clone)] pub struct Options { - /// Socket to send IPv4 STUN probes from. - /// - /// Responses are never read from this socket, they must be passed in via internal - /// messaging since, when used internally in iroh, the socket is also used to receive - /// other packets from in the magicsocket (`MagicSock`). - /// - /// If not provided, STUN probes will not be sent over IPv4. - pub(crate) stun_sock_v4: Option>, - /// Socket to send IPv6 STUN probes from. - /// - /// Responses are never read from this socket, they must be passed in via internal - /// messaging since, when used internally in iroh, the socket is also used to receive - /// other packets from in the magicsocket (`MagicSock`). - /// - /// If not provided, STUN probes will not be sent over IPv6. - pub(crate) stun_sock_v6: Option>, /// The configuration needed to launch QUIC address discovery probes. /// /// If not provided, will not run QUIC address discovery. pub(crate) quic_config: Option, - /// Enable icmp_v4 probes - /// - /// On by default - pub(crate) icmp_v4: bool, - /// Enable icmp_v6 probes - /// - /// On by default - pub(crate) icmp_v6: bool, /// Enable https probes /// /// On by default @@ -58,11 +31,7 @@ mod imp { impl Default for Options { fn default() -> Self { Self { - stun_sock_v4: None, - stun_sock_v6: None, quic_config: None, - icmp_v4: true, - icmp_v6: true, https: true, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: false, @@ -74,47 +43,19 @@ mod imp { /// Create an [`Options`] that disables all probes pub fn disabled() -> Self { Self { - stun_sock_v4: None, - stun_sock_v6: None, quic_config: None, - icmp_v4: false, - icmp_v6: false, https: false, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: false, } } - /// Set the ipv4 stun socket and enable ipv4 stun probes - pub fn stun_v4(mut self, sock: Option>) -> Self { - self.stun_sock_v4 = sock; - self - } - - /// Set the ipv6 stun socket and enable ipv6 stun probes - pub fn stun_v6(mut self, sock: Option>) -> Self { - self.stun_sock_v6 = sock; - self - } - /// Enable quic probes pub fn quic_config(mut self, quic_config: Option) -> Self { self.quic_config = quic_config; self } - /// Enable or disable icmp_v4 probe - pub fn icmp_v4(mut self, enable: bool) -> Self { - self.icmp_v4 = enable; - self - } - - /// Enable or disable icmp_v6 probe - pub fn icmp_v6(mut self, enable: bool) -> Self { - self.icmp_v6 = enable; - self - } - /// Enable or disable https probe pub fn https(mut self, enable: bool) -> Self { self.https = enable; @@ -129,30 +70,18 @@ mod imp { } /// Turn the options into set of valid protocols - pub(crate) fn to_protocols(&self) -> BTreeSet { + pub fn as_protocols(&self) -> BTreeSet { let mut protocols = BTreeSet::new(); - if self.stun_sock_v4.is_some() { - protocols.insert(ProbeProto::StunIpv4); - } - if self.stun_sock_v6.is_some() { - protocols.insert(ProbeProto::StunIpv6); - } if let Some(ref quic) = self.quic_config { if quic.ipv4 { - protocols.insert(ProbeProto::QuicIpv4); + protocols.insert(Probe::QadIpv4); } if quic.ipv6 { - protocols.insert(ProbeProto::QuicIpv6); + protocols.insert(Probe::QadIpv6); } } - if self.icmp_v4 { - protocols.insert(ProbeProto::IcmpV4); - } - if self.icmp_v6 { - protocols.insert(ProbeProto::IcmpV6); - } if self.https { - protocols.insert(ProbeProto::Https); + protocols.insert(Probe::Https); } protocols } @@ -163,7 +92,7 @@ mod imp { mod imp { use std::collections::BTreeSet; - use crate::net_report::reportgen::ProbeProto; + use crate::net_report::Probe; /// Options for running probes (in browsers). /// @@ -196,10 +125,10 @@ mod imp { } /// Turn the options into set of valid protocols - pub(crate) fn to_protocols(&self) -> BTreeSet { + pub(crate) fn as_protocols(&self) -> BTreeSet { let mut protocols = BTreeSet::new(); if self.https { - protocols.insert(ProbeProto::Https); + protocols.insert(Probe::Https); } protocols } diff --git a/iroh/src/net_report/ping.rs b/iroh/src/net_report/ping.rs deleted file mode 100644 index c887ea1e99c..00000000000 --- a/iroh/src/net_report/ping.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Allows sending ICMP echo requests to a host in order to determine network latency. - -use std::{ - fmt::Debug, - net::IpAddr, - sync::{Arc, Mutex}, -}; - -use n0_future::time::Duration; -use nested_enum_utils::common_fields; -use snafu::{ResultExt, Snafu}; -use surge_ping::{Client, Config, IcmpPacket, PingIdentifier, PingSequence, ICMP}; -use tracing::debug; - -use crate::net_report::defaults::timeouts::DEFAULT_PINGER_TIMEOUT as DEFAULT_TIMEOUT; - -/// Whether this error was because we couldn't create a client or a send error. -#[allow(missing_docs)] -#[common_fields({ - backtrace: Option, - #[snafu(implicit)] - span_trace: n0_snafu::SpanTrace, -})] -#[derive(Debug, Snafu)] -#[non_exhaustive] -pub enum PingError { - #[snafu(display("failed to create IPv4 ping client"))] - CreateClientIpv4 { source: std::io::Error }, - #[snafu(display("failed to create IPv6 ping client"))] - CreateClientIpv6 { source: std::io::Error }, - /// Could not send ping. - #[snafu(display("failed to send ping"))] - Ping { source: surge_ping::SurgeError }, -} - -/// Allows sending ICMP echo requests to a host in order to determine network latency. -/// Will gracefully handle both IPv4 and IPv6. -#[derive(Debug, Clone, Default)] -pub struct Pinger(Arc); - -impl Debug for Inner { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Inner").finish() - } -} - -#[derive(Default)] -struct Inner { - client_v6: Mutex>, - client_v4: Mutex>, -} - -impl Pinger { - /// Create a new [Pinger]. - pub fn new() -> Self { - Default::default() - } - - /// Lazily create the ping client. - /// - /// We do this because it means we do not bind a socket until we really try to send a - /// ping. It makes it more transparent to use the pinger. - fn get_client(&self, kind: ICMP) -> Result { - let client = match kind { - ICMP::V4 => { - let mut opt_client = self.0.client_v4.lock().expect("poisoned"); - match *opt_client { - Some(ref client) => client.clone(), - None => { - let cfg = Config::builder().kind(kind).build(); - let client = Client::new(&cfg).context(CreateClientIpv4Snafu)?; - *opt_client = Some(client.clone()); - client - } - } - } - ICMP::V6 => { - let mut opt_client = self.0.client_v6.lock().expect("poisoned"); - match *opt_client { - Some(ref client) => client.clone(), - None => { - let cfg = Config::builder().kind(kind).build(); - let client = Client::new(&cfg).context(CreateClientIpv6Snafu)?; - *opt_client = Some(client.clone()); - client - } - } - } - }; - Ok(client) - } - - /// Send a ping request with associated data, returning the perceived latency. - pub async fn send(&self, addr: IpAddr, data: &[u8]) -> Result { - let client = match addr { - IpAddr::V4(_) => self.get_client(ICMP::V4)?, - IpAddr::V6(_) => self.get_client(ICMP::V6)?, - }; - let ident = PingIdentifier(rand::random()); - debug!(%addr, %ident, "Creating pinger"); - let mut pinger = client.pinger(addr, ident).await; - pinger.timeout(DEFAULT_TIMEOUT); // todo: timeout too large for net_report - match pinger - .ping(PingSequence(0), data) - .await - .context(PingSnafu)? - { - (IcmpPacket::V4(packet), dur) => { - debug!( - "{} bytes from {}: icmp_seq={} ttl={:?} time={:0.2?}", - packet.get_size(), - packet.get_source(), - packet.get_sequence(), - packet.get_ttl(), - dur - ); - Ok(dur) - } - - (IcmpPacket::V6(packet), dur) => { - debug!( - "{} bytes from {}: icmp_seq={} hlim={} time={:0.2?}", - packet.get_size(), - packet.get_source(), - packet.get_sequence(), - packet.get_max_hop_limit(), - dur - ); - Ok(dur) - } - } - } -} - -#[cfg(test)] -mod tests { - use std::net::{Ipv4Addr, Ipv6Addr}; - - use tracing::error; - use tracing_test::traced_test; - - use super::*; - - // See net_report::reportgen::tests::test_icmp_probe_eu_relay for permissions to ping. - #[tokio::test] - #[traced_test] - async fn test_ping_localhost() { - let pinger = Pinger::new(); - - match pinger.send(Ipv4Addr::LOCALHOST.into(), b"data").await { - Ok(duration) => { - assert!(!duration.is_zero()); - } - Err( - PingError::CreateClientIpv4 { source, .. } - | PingError::CreateClientIpv6 { source, .. }, - ) => { - // We don't have permission, too bad. - error!("no ping permissions: {source:#}"); - } - Err(PingError::Ping { source, .. }) => { - panic!("ping failed: {source:#}"); - } - } - - match pinger.send(Ipv6Addr::LOCALHOST.into(), b"data").await { - Ok(duration) => { - assert!(!duration.is_zero()); - } - Err( - PingError::CreateClientIpv4 { source, .. } - | PingError::CreateClientIpv6 { source, .. }, - ) => { - // We don't have permission, too bad. - error!("no ping permissions: {source:#}"); - } - Err(PingError::Ping { source, .. }) => { - error!("ping failed, probably no IPv6 stack: {source:#}"); - } - } - } -} diff --git a/iroh/src/net_report/probes.rs b/iroh/src/net_report/probes.rs new file mode 100644 index 00000000000..f25f7053a31 --- /dev/null +++ b/iroh/src/net_report/probes.rs @@ -0,0 +1,269 @@ +//! The relay probes. +//! +//! All the probes try and establish the latency to the relay servers. Preferably the QAD +//! probes work and we also learn about our public IP addresses and ports. But fallback +//! probes for HTTPS exist as well. + +use std::{collections::BTreeSet, fmt, sync::Arc}; + +use iroh_relay::{RelayMap, RelayNode}; +use n0_future::time::Duration; +use snafu::Snafu; + +use crate::net_report::Report; + +/// The retransmit interval used. +const DEFAULT_INITIAL_RETRANSMIT: Duration = Duration::from_millis(100); + +/// The delay before starting HTTPS probes. +const HTTPS_OFFSET: Duration = Duration::from_millis(200); + +/// The protocol used to time a node's latency. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, derive_more::Display)] +#[repr(u8)] +pub enum Probe { + /// HTTPS + Https, + /// QUIC Address Discovery Ipv4 + #[cfg(not(wasm_browser))] + QadIpv4, + /// QUIC Address Discovery Ipv6 + #[cfg(not(wasm_browser))] + QadIpv6, +} + +/// A probe set is a sequence of similar [`Probe`]s with delays between them. +/// +/// The probes are to the same Relayer and of the same [`Probe`] but will have different +/// delays. The delays are effectively retries, though they do not wait for the previous +/// probe to be finished. The first successful probe will cancel all other probes in the +/// set. +/// +/// This is a lot of type-safety by convention. It would be so much nicer to have this +/// compile-time checked but that introduces a giant mess of generics and traits and +/// associated exploding types. +/// +/// A [`ProbeSet`] implements [`IntoIterator`] similar to how [`Vec`] does. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub(super) struct ProbeSet { + /// The [`Probe`] all the probes in this set have. + proto: Probe, + /// The data in the set. + probes: Vec<(Duration, Arc)>, +} + +#[derive(Debug, Snafu)] +#[snafu(display("Mismatching probe"))] +struct PushError; + +impl ProbeSet { + fn new(proto: Probe) -> Self { + Self { + probes: Vec::new(), + proto, + } + } + + pub(super) fn proto(&self) -> Probe { + self.proto + } + + fn push(&mut self, delay: Duration, node: Arc) { + self.probes.push((delay, node)); + } + + fn is_empty(&self) -> bool { + self.probes.is_empty() + } + + pub(super) fn params(&self) -> impl Iterator)> { + self.probes.iter() + } +} + +/// A probe plan. +/// +/// A probe plan contains a number of [`ProbeSet`]s containing probes to be executed. +/// Generally the first probe of of a set which completes aborts the remaining probes of a +/// set. Sometimes a failing probe can also abort the remaining probes of a set. +/// +/// The [`reportgen`] actor will also abort all the remaining [`ProbeSet`]s once it has +/// sufficient information for a report. +/// +/// [`reportgen`]: crate::net_report::reportgen +#[derive(Debug, Default, PartialEq, Eq)] +pub(super) struct ProbePlan { + set: BTreeSet, +} + +impl ProbePlan { + /// Creates an initial probe plan + pub(super) fn initial(relay_map: &RelayMap, protocols: &BTreeSet) -> Self { + let mut plan = Self::default(); + + for relay_node in relay_map.nodes() { + let mut https_probes = ProbeSet::new(Probe::Https); + + for attempt in 0u32..3 { + let delay = HTTPS_OFFSET + DEFAULT_INITIAL_RETRANSMIT * attempt; + https_probes.push(delay, relay_node.clone()); + } + + plan.add_if_enabled(protocols, https_probes); + } + plan + } + + /// Creates a follow up probe plan using a previous net_report report in browsers. + /// + /// This will only schedule HTTPS probes. + pub(super) fn with_last_report( + relay_map: &RelayMap, + last_report: &Report, + protocols: &BTreeSet, + ) -> Self { + if last_report.relay_latency.is_empty() { + return Self::initial(relay_map, protocols); + } + + // TODO: is this good? + Self::default() + } + + /// Returns an iterator over the [`ProbeSet`]s in this plan. + pub(super) fn iter(&self) -> impl Iterator { + self.set.iter() + } + + /// Adds a [`ProbeSet`] if it contains probes and the protocol indicated in + /// the [`ProbeSet] matches a protocol in our set of [`Probe`]s. + fn add_if_enabled(&mut self, protocols: &BTreeSet, set: ProbeSet) { + if !set.is_empty() && protocols.contains(&set.proto) { + self.set.insert(set); + } + } +} + +impl fmt::Display for ProbePlan { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "ProbePlan {{")?; + for probe_set in self.set.iter() { + writeln!(f, r#" ProbeSet("{}") {{"#, probe_set.proto)?; + for (delay, node) in probe_set.probes.iter() { + writeln!(f, " {delay:?} to {node},")?; + } + writeln!(f, " }}")?; + } + writeln!(f, "}}") + } +} + +impl FromIterator for ProbePlan { + fn from_iter>(iter: T) -> Self { + Self { + set: iter.into_iter().collect(), + } + } +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::*; + use crate::net_report::test_utils; + + /// Shorthand which declares a new ProbeSet. + /// + /// `$kind`: The `Probe`. + /// `$node`: Expression which will be an `Arc`. + /// `$delays`: A `Vec` of the delays for this probe. + macro_rules! probeset { + (proto: Probe::$kind:ident, relay: $node:expr, delays: $delays:expr,) => { + ProbeSet { + proto: Probe::$kind, + probes: $delays.iter().map(|delay| (*delay, $node)).collect(), + } + }; + } + + fn default_protocols() -> BTreeSet { + BTreeSet::from([Probe::QadIpv4, Probe::QadIpv6, Probe::Https]) + } + + #[tokio::test] + async fn test_initial_probeplan() { + let (_servers, relay_map) = test_utils::relay_map(2).await; + let relay_node_1 = relay_map.nodes().next().unwrap(); + let relay_node_2 = relay_map.nodes().nth(1).unwrap(); + let plan = ProbePlan::initial(&relay_map, &default_protocols()); + + let expected_plan: ProbePlan = [ + probeset! { + proto: Probe::Https, + relay: relay_node_1.clone(), + delays: [ + Duration::from_millis(200), + Duration::from_millis(300), + Duration::from_millis(400) + ], + }, + probeset! { + proto: Probe::Https, + relay: relay_node_2.clone(), + delays: [ + Duration::from_millis(200), + Duration::from_millis(300), + Duration::from_millis(400) + ], + }, + ] + .into_iter() + .collect(); + + println!("expected:"); + println!("{expected_plan}"); + println!("actual:"); + println!("{plan}"); + // The readable error: + assert_eq!(plan.to_string(), expected_plan.to_string()); + // Just in case there's a bug in the Display impl: + assert_eq!(plan, expected_plan); + } + + #[tokio::test] + async fn test_initial_probeplan_some_protocols() { + let (_servers, relay_map) = test_utils::relay_map(2).await; + let relay_node_1 = relay_map.nodes().next().unwrap(); + let relay_node_2 = relay_map.nodes().nth(1).unwrap(); + let plan = ProbePlan::initial(&relay_map, &BTreeSet::from([Probe::Https])); + + let expected_plan: ProbePlan = [ + probeset! { + proto: Probe::Https, + relay: relay_node_1.clone(), + delays: [Duration::from_millis(200), + Duration::from_millis(300), + Duration::from_millis(400)], + }, + probeset! { + proto: Probe::Https, + relay: relay_node_2.clone(), + delays: [Duration::from_millis(200), + Duration::from_millis(300), + Duration::from_millis(400)], + }, + ] + .into_iter() + .collect(); + + println!("expected:"); + println!("{expected_plan}"); + println!("actual:"); + println!("{plan}"); + // The readable error: + assert_eq!(plan.to_string(), expected_plan.to_string()); + // Just in case there's a bug in the Display impl: + assert_eq!(plan, expected_plan); + } +} diff --git a/iroh/src/net_report/report.rs b/iroh/src/net_report/report.rs new file mode 100644 index 00000000000..af6a957359e --- /dev/null +++ b/iroh/src/net_report/report.rs @@ -0,0 +1,214 @@ +use std::{ + collections::BTreeMap, + fmt, + net::{SocketAddr, SocketAddrV4, SocketAddrV6}, + time::Duration, +}; + +use iroh_base::RelayUrl; +use tracing::warn; + +use super::{probes::Probe, ProbeReport}; + +/// A net_report report. +#[derive(Default, Debug, PartialEq, Eq, Clone)] +pub struct Report { + /// A QAD IPv4 round trip completed. + pub udp_v4: bool, + /// A QAD IPv6 round trip completed. + pub udp_v6: bool, + /// Whether the reported public address differs when probing different servers (on IPv4). + pub mapping_varies_by_dest_ipv4: Option, + /// Whether the reported public address differs when probing different servers (on IPv6). + pub mapping_varies_by_dest_ipv6: Option, + /// Probe indicating the presence of port mapping protocols on the LAN. + /// `None` for unknown + pub preferred_relay: Option, + /// keyed by relay Url + pub relay_latency: RelayLatencies, + /// ip:port of global IPv4 + pub global_v4: Option, + /// `[ip]:port` of global IPv6 + pub global_v6: Option, + /// CaptivePortal is set when we think there's a captive portal that is + /// intercepting HTTP traffic. + pub captive_portal: Option, +} + +impl fmt::Display for Report { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self, f) + } +} + +impl Report { + /// Do we have any indication that UDP is working? + pub fn has_udp(&self) -> bool { + self.udp_v4 || self.udp_v6 + } + + /// Whether the reported public address differs when probing different servers. + pub fn mapping_varies_by_dest(&self) -> Option { + match ( + self.mapping_varies_by_dest_ipv4, + self.mapping_varies_by_dest_ipv6, + ) { + (Some(v4), Some(v6)) => Some(v4 || v6), + (None, Some(v6)) => Some(v6), + (Some(v4), None) => Some(v4), + (None, None) => None, + } + } + + /// Updates a net_report [`Report`] with a new [`ProbeReport`]. + pub(super) fn update(&mut self, report: &ProbeReport) { + match report { + ProbeReport::Https(report) => { + self.relay_latency + .update_relay(report.node.clone(), report.latency, Probe::Https); + } + #[cfg(not(wasm_browser))] + ProbeReport::QadIpv4(report) => { + self.relay_latency.update_relay( + report.node.clone(), + report.latency, + Probe::QadIpv4, + ); + let SocketAddr::V4(ipp) = report.addr else { + warn!("received IPv6 address from IPv4 QAD: {}", report.addr); + return; + }; + + self.udp_v4 = true; + + tracing::debug!(?self.global_v4, ?self.mapping_varies_by_dest_ipv4, %ipp,"got"); + if let Some(global) = self.global_v4 { + if global == ipp { + if self.mapping_varies_by_dest_ipv4.is_none() { + self.mapping_varies_by_dest_ipv4 = Some(false); + } + } else { + self.mapping_varies_by_dest_ipv4 = Some(true); + warn!("IPv4 address detected by QAD varies by destination"); + } + } else { + self.global_v4 = Some(ipp); + } + } + #[cfg(not(wasm_browser))] + ProbeReport::QadIpv6(report) => { + self.relay_latency.update_relay( + report.node.clone(), + report.latency, + Probe::QadIpv6, + ); + let SocketAddr::V6(ipp) = report.addr else { + warn!("received IPv4 address from IPv6 QAD: {}", report.addr); + return; + }; + + self.udp_v6 = true; + tracing::debug!(?self.global_v6, ?self.mapping_varies_by_dest_ipv6, %ipp,"got"); + if let Some(global) = self.global_v6 { + if global == ipp { + if self.mapping_varies_by_dest_ipv6.is_none() { + self.mapping_varies_by_dest_ipv6 = Some(false); + } + } else { + self.mapping_varies_by_dest_ipv6 = Some(true); + warn!("IPv6 address detected by QAD varies by destination"); + } + } else { + self.global_v6 = Some(ipp); + } + } + } + } +} + +/// Latencies per relay node. +#[derive(Debug, Default, PartialEq, Eq, Clone)] +pub struct RelayLatencies { + #[cfg(not(wasm_browser))] + ipv4: BTreeMap, + #[cfg(not(wasm_browser))] + ipv6: BTreeMap, + https: BTreeMap, +} + +impl RelayLatencies { + /// Updates a relay's latency, if it is faster than before. + pub(super) fn update_relay(&mut self, url: RelayUrl, latency: Duration, probe: Probe) { + let list = match probe { + Probe::Https => &mut self.https, + #[cfg(not(wasm_browser))] + Probe::QadIpv4 => &mut self.ipv4, + #[cfg(not(wasm_browser))] + Probe::QadIpv6 => &mut self.ipv6, + }; + let old_latency = list.entry(url).or_insert(latency); + if latency < *old_latency { + *old_latency = latency; + } + } + + /// Merges another [`RelayLatencies`] into this one. + /// + /// For each relay the latency is updated using [`RelayLatencies::update_relay`]. + pub(super) fn merge(&mut self, other: &RelayLatencies) { + for (url, latency) in other.https.iter() { + self.update_relay(url.clone(), *latency, Probe::Https); + } + #[cfg(not(wasm_browser))] + for (url, latency) in other.ipv4.iter() { + self.update_relay(url.clone(), *latency, Probe::QadIpv4); + } + #[cfg(not(wasm_browser))] + for (url, latency) in other.ipv6.iter() { + self.update_relay(url.clone(), *latency, Probe::QadIpv6); + } + } + + /// Returns an iterator over all the relays and their latencies. + #[cfg(not(wasm_browser))] + pub fn iter(&self) -> impl Iterator + '_ { + self.https + .iter() + .chain(self.ipv4.iter()) + .chain(self.ipv6.iter()) + .map(|(k, v)| (k, *v)) + } + + /// Returns an iterator over all the relays and their latencies. + #[cfg(wasm_browser)] + pub fn iter(&self) -> impl Iterator + '_ { + self.https.iter().map(|(k, v)| (k, *v)) + } + + #[cfg(not(wasm_browser))] + pub(super) fn is_empty(&self) -> bool { + self.https.is_empty() && self.ipv4.is_empty() && self.ipv6.is_empty() + } + + #[cfg(wasm_browser)] + pub(super) fn is_empty(&self) -> bool { + self.https.is_empty() + } + + /// Returns the lowest latency across records. + pub(super) fn get(&self, url: &RelayUrl) -> Option { + let mut list = Vec::with_capacity(3); + if let Some(val) = self.https.get(url) { + list.push(*val); + } + #[cfg(not(wasm_browser))] + if let Some(val) = self.ipv4.get(url) { + list.push(*val); + } + #[cfg(not(wasm_browser))] + if let Some(val) = self.ipv6.get(url) { + list.push(*val); + } + list.into_iter().min() + } +} diff --git a/iroh/src/net_report/reportgen.rs b/iroh/src/net_report/reportgen.rs index 4484f20047d..aa6d95094f5 100644 --- a/iroh/src/net_report/reportgen.rs +++ b/iroh/src/net_report/reportgen.rs @@ -6,7 +6,6 @@ //! messages from the client. It follows roughly these steps: //! //! - Determines host IPv6 support. -//! - Creates hairpin actor. //! - Creates portmapper future. //! - Creates captive portal detection future. //! - Creates Probe Set futures. @@ -16,24 +15,21 @@ //! - Stop if there are no outstanding tasks/futures, or on timeout. //! - Sends the completed report to the net_report actor. +#[cfg(not(wasm_browser))] +use std::net::{SocketAddrV4, SocketAddrV6}; use std::{ collections::BTreeSet, - future::Future, net::{IpAddr, SocketAddr}, - pin::Pin, sync::Arc, - task::{Context, Poll}, }; use http::StatusCode; use iroh_base::RelayUrl; +use iroh_relay::{defaults::DEFAULT_RELAY_QUIC_PORT, http::RELAY_PROBE_PATH, RelayMap, RelayNode}; #[cfg(not(wasm_browser))] -use iroh_relay::dns::{DnsError, DnsResolver, StaggeredError}; use iroh_relay::{ - defaults::{DEFAULT_RELAY_QUIC_PORT, DEFAULT_STUN_PORT}, - http::RELAY_PROBE_PATH, - protos::stun, - RelayMap, RelayNode, + dns::{DnsError, DnsResolver, StaggeredError}, + quic::QuicClient, }; #[cfg(wasm_browser)] use n0_future::future::Pending; @@ -42,61 +38,71 @@ use n0_future::{ time::{self, Duration, Instant}, StreamExt as _, }; -#[cfg(not(wasm_browser))] -use netwatch::{interfaces, UdpSocket}; use rand::seq::IteratorRandom; -use snafu::{IntoError, ResultExt, Snafu}; -use tokio::sync::{mpsc, oneshot}; -use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span}; +use snafu::{IntoError, OptionExt, ResultExt, Snafu}; +use tokio::sync::mpsc; +use tokio_util::sync::CancellationToken; +use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument}; use url::Host; #[cfg(wasm_browser)] -use crate::net_report::portmapper; // We stub the library -use crate::net_report::{self, Metrics, Report}; +use super::portmapper; // We stub the library #[cfg(not(wasm_browser))] -use crate::net_report::{ - defaults::timeouts::DNS_TIMEOUT, - dns::DNS_STAGGERING_MS, - ip_mapped_addrs::IpMappedAddresses, - ping::{PingError, Pinger}, +use super::{defaults::timeouts::DNS_TIMEOUT, ip_mapped_addrs::IpMappedAddresses}; +use super::{ + probes::{Probe, ProbePlan}, + Report, }; - #[cfg(not(wasm_browser))] -mod hairpin; -mod probes; - -pub use probes::ProbeProto; -use probes::{Probe, ProbePlan}; - +use crate::discovery::dns::DNS_STAGGERING_MS; use crate::net_report::defaults::timeouts::{ CAPTIVE_PORTAL_DELAY, CAPTIVE_PORTAL_TIMEOUT, OVERALL_REPORT_TIMEOUT, PROBES_TIMEOUT, }; -const ENOUGH_NODES: usize = 3; - -/// Holds the state for a single invocation of [`net_report::Client::get_report`]. +/// Holds the state for a single report generation. /// /// Dropping this will cancel the actor and stop the report generation. #[derive(Debug)] pub(super) struct Client { - // Addr is currently only used by child actors, so not yet exposed here. _drop_guard: AbortOnDropHandle<()>, } +/// Some details required from the interface state of the device. +#[derive(Debug, Clone, Default)] +pub(crate) struct IfStateDetails { + /// Do we have IPv4 capbilities + pub have_v4: bool, + /// Do we have IPv6 capbilities + pub have_v6: bool, +} + +impl IfStateDetails { + #[cfg(test)] + pub(super) fn fake() -> Self { + IfStateDetails { + have_v4: true, + have_v6: true, + } + } +} + +impl From for IfStateDetails { + fn from(value: netwatch::netmon::State) -> Self { + IfStateDetails { + have_v4: value.have_v4, + have_v6: value.have_v6, + } + } +} + /// Any state that depends on sockets being available in the current environment. /// /// Factored out so it can be disabled easily in browsers. #[cfg(not(wasm_browser))] #[derive(Debug, Clone)] pub(crate) struct SocketState { - /// The portmapper client, if there is one. - pub(crate) port_mapper: Option, - /// Socket to send IPv4 STUN requests from. - pub(crate) stun_sock4: Option>, - /// Socket so send IPv6 STUN requests from. - pub(crate) stun_sock6: Option>, - /// QUIC configuration to do QUIC address Discovery - pub(crate) quic_config: Option, + /// QUIC client to do QUIC address Discovery + pub(crate) quic_client: Option, /// The DNS resolver to use for probes that need to resolve DNS records. pub(crate) dns_resolver: DnsResolver, /// Optional [`IpMappedAddresses`] used to enable QAD in iroh @@ -109,158 +115,89 @@ impl Client { /// The actor starts running immediately and only generates a single report, after which /// it shuts down. Dropping this handle will abort the actor. pub(super) fn new( - net_report: net_report::Addr, - last_report: Option>, + last_report: Option, relay_map: RelayMap, - protocols: BTreeSet, - metrics: Arc, + protocols: BTreeSet, + if_state: IfStateDetails, #[cfg(not(wasm_browser))] socket_state: SocketState, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, - ) -> Self { + ) -> (Self, mpsc::Receiver) { let (msg_tx, msg_rx) = mpsc::channel(32); - let addr = Addr { - sender: msg_tx.clone(), - }; - let mut actor = Actor { + let actor = Actor { msg_tx, - msg_rx, - net_report: net_report.clone(), last_report, relay_map, - report: Report::default(), - outstanding_tasks: OutstandingTasks::default(), protocols, #[cfg(not(wasm_browser))] socket_state, - #[cfg(not(wasm_browser))] - hairpin_actor: hairpin::Client::new(net_report, addr), - metrics, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify, + if_state, }; - let task = - task::spawn(async move { actor.run().await }.instrument(info_span!("reportgen.actor"))); - Self { - _drop_guard: AbortOnDropHandle::new(task), - } - } -} - -/// The address of the reportstate [`Actor`]. -/// -/// Unlike the [`Client`] struct itself this is the raw channel to send message over. -/// Keeping this alive will not keep the actor alive, which makes this handy to pass to -/// internal tasks. -#[derive(Debug, Clone)] -pub(super) struct Addr { - sender: mpsc::Sender, -} - -impl Addr { - /// Blocking send to the actor, to be used from a non-actor future. - async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> { - trace!( - "sending {:?} to channel with cap {}", - msg, - self.sender.capacity() - ); - self.sender.send(msg).await + let task = task::spawn(actor.run().instrument(info_span!("reportgen.actor"))); + ( + Self { + _drop_guard: AbortOnDropHandle::new(task), + }, + msg_rx, + ) } } -/// Messages to send to the reportstate [`Actor`]. -#[derive(Debug)] -enum Message { - /// Set the hairpinning availability in the report. - HairpinResult(bool), - /// Check whether executing a probe would still help. - // TODO: Ideally we remove the need for this message and the logic is inverted: once we - // get a probe result we cancel all probes that are no longer needed. But for now it's - // this way around to ease conversion. - ProbeWouldHelp(Probe, Arc, oneshot::Sender), - /// Abort all remaining probes. - AbortProbes, -} - /// The reportstate actor. /// /// This actor starts, generates a single report and exits. #[derive(Debug)] struct Actor { - /// The sender of the message channel, so we can give out [`Addr`]. - msg_tx: mpsc::Sender, - /// The receiver of the message channel. - msg_rx: mpsc::Receiver, - /// The address of the net_report actor. - net_report: super::Addr, + msg_tx: mpsc::Sender, // Provided state /// The previous report, if it exists. - last_report: Option>, + last_report: Option, /// The relay configuration. relay_map: RelayMap, // Internal state. - /// The report being built. - report: Report, - /// Which tasks the [`Actor`] is still waiting on. - /// - /// This is essentially the summary of all the work the [`Actor`] is doing. - outstanding_tasks: OutstandingTasks, /// Protocols we should attempt to create probes for, if we have the correct /// configuration for that protocol. - protocols: BTreeSet, + protocols: BTreeSet, /// Any socket-related state that doesn't exist/work in browsers #[cfg(not(wasm_browser))] socket_state: SocketState, - /// The hairpin actor. - #[cfg(not(wasm_browser))] - hairpin_actor: hairpin::Client, - metrics: Arc, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, + if_state: IfStateDetails, } #[allow(missing_docs)] #[derive(Debug, Snafu)] #[non_exhaustive] -pub enum ActorRunError { - #[snafu(display("Report generation timed out"))] - Timeout, - #[snafu(display("Client that requested the report is gone"))] - ClientGone, - #[snafu(display("Internal NetReport actor is gone"))] - ActorGone, - #[snafu(transparent)] - Probes { source: ProbesError }, -} - -#[allow(missing_docs)] -#[derive(Debug, Snafu)] -#[non_exhaustive] -pub enum ProbesError { +#[snafu(module)] +pub(super) enum ProbesError { #[snafu(display("Probe failed"))] ProbeFailure { source: ProbeError }, #[snafu(display("All probes failed"))] AllProbesFailed, + #[snafu(display("Probe cancelled"))] + Cancelled, + #[snafu(display("Probe timed out"))] + Timeout, } -impl Actor { - fn addr(&self) -> Addr { - Addr { - sender: self.msg_tx.clone(), - } - } +#[derive(Debug)] +pub(super) enum ProbeFinished { + Regular(Result), + #[cfg(not(wasm_browser))] + CaptivePortal(Option), +} - async fn run(&mut self) { - match self.run_inner().await { - Ok(_) => debug!("reportgen actor finished"), - Err(err) => { - self.net_report - .send(net_report::Message::ReportAborted { reason: err }) - .await - .ok(); +impl Actor { + async fn run(self) { + match time::timeout(OVERALL_REPORT_TIMEOUT, self.run_inner()).await { + Ok(()) => debug!("reportgen actor finished"), + Err(time::Elapsed { .. }) => { + warn!("reportgen timed out"); } } } @@ -269,7 +206,6 @@ impl Actor { /// /// This actor runs by: /// - /// - Creates a hairpin actor. /// - Creates a captive portal future. /// - Creates ProbeSet futures in a group of futures. /// - Runs a main loop: @@ -277,269 +213,58 @@ impl Actor { /// - Receives actor messages (sent by those futures). /// - Updates the report, cancels unneeded futures. /// - Sends the report to the net_report actor. - async fn run_inner(&mut self) -> Result<(), ActorRunError> { - #[cfg(not(wasm_browser))] - let port_mapper = self.socket_state.port_mapper.is_some(); - #[cfg(wasm_browser)] - let port_mapper = false; - debug!(%port_mapper, "reportstate actor starting"); - - self.report.os_has_ipv6 = super::os_has_ipv6(); - - let mut port_mapping = self.prepare_portmapper_task(); - let mut captive_task = self.prepare_captive_portal_task(); - let mut probes = self.spawn_probes_task().await; - - let total_timer = time::sleep(OVERALL_REPORT_TIMEOUT); - tokio::pin!(total_timer); - let probe_timer = time::sleep(PROBES_TIMEOUT); - tokio::pin!(probe_timer); - - loop { - trace!(awaiting = ?self.outstanding_tasks, "tick; awaiting tasks"); - if self.outstanding_tasks.all_done() { - debug!("all tasks done"); - break; - } - tokio::select! { - biased; - _ = &mut total_timer => { - trace!("tick: total_timer expired"); - return Err(TimeoutSnafu.build()); - } + async fn run_inner(self) { + debug!("reportstate actor starting"); - _ = &mut probe_timer => { - warn!("tick: probes timed out"); - // Set new timeout to not go into this branch multiple times. We need - // the abort to finish all probes normally. PROBES_TIMEOUT is - // sufficiently far in the future. - probe_timer.as_mut().reset(Instant::now() + PROBES_TIMEOUT); - probes.abort_all(); - self.handle_abort_probes(); - } + let mut probes = JoinSet::default(); - // Drive the portmapper. - pm = &mut port_mapping, if self.outstanding_tasks.port_mapper => { - debug!(report=?pm, "tick: portmapper probe report"); - self.report.portmap_probe = pm; - port_mapping.inner = None; - self.outstanding_tasks.port_mapper = false; - } + let _probes_token = self.spawn_probes_task(self.if_state.clone(), &mut probes); + let mut num_probes = probes.len(); - // Check for probes finishing. - set_result = probes.join_next(), if self.outstanding_tasks.probes => { - trace!("tick: probes done: {:?}", set_result); - match set_result { - Some(Ok(Ok(report))) => self.handle_probe_report(report), - Some(Ok(Err(_))) => (), - Some(Err(e)) => { - warn!("probes task error: {:?}", e); - } - None => { - self.handle_abort_probes(); - } - } - trace!("tick: probes handled"); - } + let captive_token = self.prepare_captive_portal_task(&mut probes); - // Drive the captive task. - found = &mut captive_task, if self.outstanding_tasks.captive_task => { - trace!("tick: captive portal task done"); - self.report.captive_portal = found; - captive_task.inner = None; - self.outstanding_tasks.captive_task = false; - } + // any reports of working UDP/QUIC? + let mut have_udp = false; - // Handle actor messages. - msg = self.msg_rx.recv() => { - trace!("tick: msg recv: {:?}", msg); - match msg { - Some(msg) => self.handle_message(msg), - None => { - return Err(ClientGoneSnafu.build()); + // Check for probes finishing. + while let Some(probe_result) = probes.join_next().await { + trace!(?probe_result, num_probes, "processing finished probe"); + match probe_result { + Ok(report) => { + #[cfg_attr(wasm_browser, allow(irrefutable_let_patterns))] + if let ProbeFinished::Regular(report) = &report { + have_udp |= report.as_ref().map(|r| r.is_udp()).unwrap_or_default(); + num_probes -= 1; + + // If all probes are done & we have_udp cancel captive + if num_probes == 0 { + debug!("all regular probes done"); + debug_assert!(probes.len() <= 1, "{} probes", probes.len()); + + if have_udp { + captive_token.cancel(); + } } } + self.msg_tx.send(report).await.ok(); } - } - } - - if !probes.is_empty() { - debug!( - "aborting {} probe sets, already have enough reports", - probes.len() - ); - drop(probes); - } - - debug!("Sending report to net_report actor"); - self.net_report - .send(net_report::Message::ReportReady { - report: Box::new(self.report.clone()), - }) - .await - .map_err(|_| ActorGoneSnafu.build())?; - - Ok(()) - } - - /// Handles an actor message. - /// - /// Returns `true` if all the probes need to be aborted. - fn handle_message(&mut self, msg: Message) { - trace!(?msg, "handling message"); - match msg { - Message::HairpinResult(works) => { - self.report.hair_pinning = Some(works); - self.outstanding_tasks.hairpin = false; - } - Message::ProbeWouldHelp(probe, relay_node, response_tx) => { - let res = self.probe_would_help(probe, relay_node); - if response_tx.send(res).is_err() { - debug!("probe dropped before ProbeWouldHelp response sent"); - } - } - Message::AbortProbes => { - self.handle_abort_probes(); - } - } - } - - fn handle_probe_report(&mut self, probe_report: ProbeReport) { - debug!(?probe_report, "finished probe"); - update_report(&mut self.report, probe_report); - - // When we discover the first IPv4 address we want to start the hairpin actor. - #[cfg(not(wasm_browser))] - if let Some(ref addr) = self.report.global_v4 { - if !self.hairpin_actor.has_started() { - self.hairpin_actor.start_check(*addr); - self.outstanding_tasks.hairpin = true; - } - } - - // Once we've heard from enough relay servers (3), start a timer to give up on the other - // probes. The timer's duration is a function of whether this is our initial full - // probe or an incremental one. For incremental ones, wait for the duration of the - // slowest relay. For initial ones, double that. - let enough_relays = std::cmp::min(self.relay_map.len(), ENOUGH_NODES); - if self.report.relay_latency.len() == enough_relays { - let timeout = self.report.relay_latency.max_latency(); - let timeout = match self.last_report.is_some() { - true => timeout, - false => timeout * 2, - }; - let reportcheck = self.addr(); - debug!( - reports=self.report.relay_latency.len(), - delay=?timeout, - "Have enough probe reports, aborting further probes soon", - ); - task::spawn( - async move { - time::sleep(timeout).await; - // Because we do this after a timeout it is entirely normal that the - // actor is no longer there by the time we send this message. - reportcheck - .send(Message::AbortProbes) - .await - .map_err(|err| trace!("Failed to abort all probes: {err:#}")) - .ok(); - } - .instrument(Span::current()), - ); - } - } - - /// Whether running this probe would still improve our report. - fn probe_would_help(&mut self, probe: Probe, relay_node: Arc) -> bool { - // If the probe is for a relay we don't yet know about, that would help. - if self.report.relay_latency.get(&relay_node.url).is_none() { - return true; - } - - // If the probe is for IPv6 and we don't yet have an IPv6 report, that would help. - #[cfg(not(wasm_browser))] - if probe.proto() == ProbeProto::StunIpv6 && self.report.relay_v6_latency.is_empty() { - return true; - } - - // For IPv4, we need at least two IPv4 results overall to - // determine whether we're behind a NAT that shows us as - // different source IPs and/or ports depending on who we're - // talking to. If we don't yet have two results yet - // (`mapping_varies_by_dest_ip` is blank), then another IPv4 probe - // would be good. - #[cfg(not(wasm_browser))] - if probe.proto() == ProbeProto::StunIpv4 && self.report.mapping_varies_by_dest_ip.is_none() - { - return true; - } - - // Otherwise not interesting. - false - } - - /// Stops further probes. - /// - /// This makes sure that no further probes are run and also cancels the captive portal - /// and portmapper tasks if there were successful probes. Be sure to only handle this - /// after all the required [`ProbeReport`]s have been processed. - fn handle_abort_probes(&mut self) { - trace!("handle abort probes"); - self.outstanding_tasks.probes = false; - if self.report.udp { - self.outstanding_tasks.port_mapper = false; - self.outstanding_tasks.captive_task = false; - } - } - - /// Creates the future which will perform the portmapper task. - /// - /// The returned future will run the portmapper, if enabled, resolving to it's result. - fn prepare_portmapper_task( - &mut self, - ) -> MaybeFuture>>>> { - // In the browser, the compiler struggles to infer the type of future inside, because it's never set. - #[cfg(wasm_browser)] - let port_mapping: MaybeFuture>>>> = - MaybeFuture::default(); - - #[cfg(not(wasm_browser))] - let mut port_mapping = MaybeFuture::default(); - - #[cfg(not(wasm_browser))] - if let Some(port_mapper) = self.socket_state.port_mapper.clone() { - port_mapping.inner = Some(Box::pin(async move { - match port_mapper.probe().await { - Ok(Ok(res)) => Some(res), - Ok(Err(err)) => { - debug!("skipping port mapping: {err:?}"); - None - } - Err(recv_err) => { - warn!("skipping port mapping: {recv_err:?}"); - None + Err(e) => { + if e.is_panic() { + error!("Task panicked {:?}", e); + break; } + warn!("probes task join error: {:?}", e); } - })); - self.outstanding_tasks.port_mapper = true; + } } - port_mapping } /// Creates the future which will perform the captive portal check. - fn prepare_captive_portal_task( - &mut self, - ) -> MaybeFuture>>>> { - // In the browser case the compiler cannot infer the type of the future, because it's never set: - #[cfg(wasm_browser)] - let captive_task: MaybeFuture>>>> = MaybeFuture::default(); - - #[cfg(not(wasm_browser))] - let mut captive_task = MaybeFuture::default(); + fn prepare_captive_portal_task(&self, tasks: &mut JoinSet) -> CancellationToken { + let token = CancellationToken::new(); // If we're doing a full probe, also check for a captive portal. We - // delay by a bit to wait for UDP STUN to finish, to avoid the probe if + // delay by a bit to wait for UDP QAD to finish, to avoid the probe if // it's unnecessary. #[cfg(not(wasm_browser))] if self.last_report.is_none() { @@ -552,39 +277,49 @@ impl Actor { let dns_resolver = self.socket_state.dns_resolver.clone(); let dm = self.relay_map.clone(); - self.outstanding_tasks.captive_task = true; - captive_task.inner = Some(Box::pin(async move { - time::sleep(CAPTIVE_PORTAL_DELAY).await; - debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}"); - let captive_portal_check = time::timeout( - CAPTIVE_PORTAL_TIMEOUT, - check_captive_portal(&dns_resolver, &dm, preferred_relay) - .instrument(debug_span!("captive-portal")), - ); - match captive_portal_check.await { - Ok(Ok(found)) => Some(found), - Ok(Err(err)) => { - match err { - CaptivePortalError::CreateReqwestClient { ref source } - | CaptivePortalError::HttpRequest { ref source } => { - if source.is_connect() { - debug!("check_captive_portal failed: {err:#}"); + let token = token.clone(); + tasks.spawn( + async move { + let res = token + .run_until_cancelled_owned(async move { + time::sleep(CAPTIVE_PORTAL_DELAY).await; + trace!("check started after {CAPTIVE_PORTAL_DELAY:?}"); + time::timeout( + CAPTIVE_PORTAL_TIMEOUT, + check_captive_portal(&dns_resolver, &dm, preferred_relay), + ) + .await + }) + .await; + let res = match res { + Some(Ok(Ok(found))) => Some(found), + Some(Ok(Err(err))) => { + match err { + CaptivePortalError::CreateReqwestClient { source } + | CaptivePortalError::HttpRequest { source } + if source.is_connect() => + { + debug!("check_captive_portal failed: {source:#}"); } + err => warn!("check_captive_portal error: {err:#}"), } - _ => warn!("check_captive_portal error: {err:#}"), + None } - None - } - Err(_) => { - warn!("check_captive_portal timed out"); - None - } + Some(Err(time::Elapsed { .. })) => { + warn!("probe timed out"); + None + } + None => { + trace!("probe cancelled"); + None + } + }; + ProbeFinished::CaptivePortal(res) } - })); + .instrument(debug_span!("captive-portal")), + ); } - - self.outstanding_tasks.captive_task = false; - captive_task + token } /// Prepares the future which will run all the probes as per generated ProbePlan. @@ -605,220 +340,132 @@ impl Actor { /// failure permanent. Probes in a probe set are essentially retries. /// - Once there are [`ProbeReport`]s from enough nodes, all remaining probes are /// aborted. That is, the main actor loop stops polling them. - async fn spawn_probes_task(&mut self) -> JoinSet> { - #[cfg(not(wasm_browser))] - let if_state = interfaces::State::new().await; - #[cfg(not(wasm_browser))] - debug!(%if_state, "Local interfaces"); + fn spawn_probes_task( + &self, + if_state: IfStateDetails, + probes: &mut JoinSet, + ) -> CancellationToken { + debug!(?if_state, "local interface details"); let plan = match self.last_report { - Some(ref report) => ProbePlan::with_last_report( - &self.relay_map, - report, - &self.protocols, - #[cfg(not(wasm_browser))] - &if_state, - ), - None => ProbePlan::initial( - &self.relay_map, - &self.protocols, - #[cfg(not(wasm_browser))] - &if_state, - ), + Some(ref report) => { + ProbePlan::with_last_report(&self.relay_map, report, &self.protocols) + } + None => ProbePlan::initial(&self.relay_map, &self.protocols), }; trace!(%plan, "probe plan"); - // The pinger is created here so that any sockets that might be bound for it are - // shared between the probes that use it. It binds sockets lazily, so we can always - // create it. - #[cfg(not(wasm_browser))] - let pinger = Pinger::new(); + let token = CancellationToken::new(); - // A collection of futures running probe sets. - let mut probes = JoinSet::default(); for probe_set in plan.iter() { - let mut set = JoinSet::default(); - for probe in probe_set { - let reportstate = self.addr(); - let relay_node = probe.node().clone(); - let probe = probe.clone(); - let net_report = self.net_report.clone(); - - #[cfg(not(wasm_browser))] - let pinger = pinger.clone(); - #[cfg(not(wasm_browser))] - let socket_state = self.socket_state.clone(); - - let metrics = self.metrics.clone(); - set.spawn( - run_probe( - reportstate, - relay_node, - probe.clone(), - net_report, - metrics, - #[cfg(not(wasm_browser))] - pinger, + let set_token = token.child_token(); + let proto = probe_set.proto(); + for (delay, relay_node) in probe_set.params() { + let probe_token = set_token.child_token(); + + let fut = probe_token.run_until_cancelled_owned(time::timeout( + PROBES_TIMEOUT, + proto.run( + *delay, + relay_node.clone(), #[cfg(not(wasm_browser))] - socket_state, + self.socket_state.clone(), #[cfg(any(test, feature = "test-utils"))] self.insecure_skip_relay_cert_verify, - ) - .instrument(debug_span!("run_probe", %probe)), - ); - } - - // Add the probe set to all futures of probe sets. Handle aborting a probe set - // if needed, only normal errors means the set continues. - probes.spawn( - async move { - // Hack because ProbeSet is not it's own type yet. - let mut probe_proto = None; - while let Some(res) = set.join_next().await { - match res { - Ok(Ok(report)) => return Ok(report), - Ok(Err(ProbeErrorWithProbe::Error(err, probe))) => { - probe_proto = Some(probe.proto()); - warn!(?probe, "probe failed: {:#}", err); - continue; - } - Ok(Err(ProbeErrorWithProbe::AbortSet(err, probe))) => { - debug!(?probe, "probe set aborted: {:#}", err); - set.abort_all(); - return Err(ProbeFailureSnafu.into_error(err)); + ), + )); + probes.spawn( + async move { + let res = fut.await; + let res = match res { + Some(Ok(Ok(report))) => Ok(report), + Some(Ok(Err(err))) => { + warn!("probe failed: {:#}", err); + Err(probes_error::ProbeFailureSnafu {}.into_error(err)) } - Err(err) => { - warn!("fatal probe set error, aborting: {:#}", err); - continue; + Some(Err(time::Elapsed { .. })) => { + Err(probes_error::TimeoutSnafu.build()) } - } + None => Err(probes_error::CancelledSnafu.build()), + }; + ProbeFinished::Regular(res) } - warn!(?probe_proto, "no successful probes in ProbeSet"); - Err(AllProbesFailedSnafu.build()) - } - .instrument(info_span!("probe")), - ); + .instrument(debug_span!( + "run-probe", + ?proto, + ?delay, + ?relay_node + )), + ); + } } - self.outstanding_tasks.probes = true; - probes + token } } -/// Tasks on which the reportgen [`Actor`] is still waiting. -/// -/// There is no particular progression, e.g. hairpin starts `false`, moves to `true` when a -/// check is started and then becomes `false` again once it is finished. -#[derive(Debug, Default)] -struct OutstandingTasks { - probes: bool, - port_mapper: bool, - captive_task: bool, - hairpin: bool, +/// The result of running a probe. +#[derive(Debug, Clone)] +pub(super) enum ProbeReport { + #[cfg(not(wasm_browser))] + QadIpv4(QadProbeReport), + #[cfg(not(wasm_browser))] + QadIpv6(QadProbeReport), + Https(HttpsProbeReport), } -impl OutstandingTasks { - fn all_done(&self) -> bool { - !(self.probes || self.port_mapper || self.captive_task || self.hairpin) +impl ProbeReport { + #[cfg(not(wasm_browser))] + pub(super) fn is_udp(&self) -> bool { + matches!(self, Self::QadIpv4(_) | Self::QadIpv6(_)) + } + + #[cfg(wasm_browser)] + pub(super) fn is_udp(&self) -> bool { + false } } -/// The success result of [`run_probe`]. -#[derive(Debug, Clone)] -struct ProbeReport { - /// Whether we can send IPv4 UDP packets. - ipv4_can_send: bool, - /// Whether we can send IPv6 UDP packets. - ipv6_can_send: bool, - /// Whether we can send ICMPv4 packets, `None` if not checked. - icmpv4: Option, - /// Whether we can send ICMPv6 packets, `None` if not checked. - icmpv6: Option, +#[cfg(not(wasm_browser))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub(super) struct QadProbeReport { + /// The relay node that was probed + pub(super) node: RelayUrl, /// The latency to the relay node. - latency: Option, - /// The probe that generated this report. - probe: Probe, + pub(super) latency: Duration, /// The discovered public address. - addr: Option, + pub(super) addr: SocketAddr, } -impl ProbeReport { - fn new(probe: Probe) -> Self { - ProbeReport { - probe, - ipv4_can_send: false, - ipv6_can_send: false, - icmpv4: None, - icmpv6: None, - latency: None, - addr: None, - } - } -} - -/// Errors for [`run_probe`]. -/// -/// The main purpose is to signal whether other probes in this probe set should still be -/// run. Recall that a probe set is normally a set of identical probes with delays, -/// effectively creating retries, and the first successful probe of a probe set will cancel -/// the others in the set. So this allows an unsuccessful probe to cancel the remainder of -/// the set or not. -#[derive(Debug)] -enum ProbeErrorWithProbe { - /// Abort the current set. - AbortSet(ProbeError, Probe), - /// Continue the other probes in the set. - Error(ProbeError, Probe), +#[derive(Debug, Clone)] +pub(super) struct HttpsProbeReport { + /// The relay node that was probed + pub(super) node: RelayUrl, + /// The latency to the relay node. + pub(super) latency: Duration, } #[allow(missing_docs)] #[derive(Debug, Snafu)] #[snafu(module)] #[non_exhaustive] -pub enum ProbeError { +pub(super) enum ProbeError { #[snafu(display("Client is gone"))] ClientGone, #[snafu(display("Probe is no longer useful"))] NotUseful, - #[cfg(not(wasm_browser))] - #[snafu(display("Failed to retrieve the relay address"))] - GetRelayAddr { source: GetRelayAddrError }, - #[snafu(display("Failed to run stun probe"))] - Stun { source: StunError }, - #[snafu(display("Failed to run QUIC probe"))] - Quic { source: QuicError }, - #[cfg(not(wasm_browser))] - #[snafu(display("Failed to run ICMP probe"))] - Icmp { source: PingError }, + #[snafu(display("Failed to run HTTPS probe"))] + Https { source: MeasureHttpsLatencyError }, } #[allow(missing_docs)] #[derive(Debug, Snafu)] #[snafu(module)] #[non_exhaustive] -pub enum StunError { - #[snafu(display("No UDP socket available"))] - NoSocket, - #[snafu(display("Stun channel is gone"))] - StunChannelGone, - #[snafu(display("Failed to send full STUN request"))] - SendFull, - #[snafu(display("Failed to send STUN request"))] - Send { source: std::io::Error }, -} - -#[allow(missing_docs)] -#[derive(Debug, Snafu)] -#[snafu(module)] -#[non_exhaustive] -pub enum QuicError { +pub(super) enum QuicError { #[snafu(display("No QUIC endpoint available"))] NoEndpoint, #[snafu(display("URL must have 'host' to use QUIC address discovery probes"))] InvalidUrl, - #[snafu(display("Failed to create QUIC endpoint"))] - CreateClient { source: iroh_relay::quic::Error }, - #[snafu(display("Failed to get address and latency"))] - GetAddr { source: iroh_relay::quic::Error }, } /// Pieces needed to do QUIC address discovery. @@ -835,304 +482,53 @@ pub struct QuicConfig { pub ipv6: bool, } -/// Executes a particular [`Probe`], including using a delayed start if needed. -/// -/// If *stun_sock4* and *stun_sock6* are `None` the STUN probes are disabled. -#[allow(clippy::too_many_arguments)] -async fn run_probe( - reportstate: Addr, - relay_node: Arc, - probe: Probe, - net_report: net_report::Addr, - metrics: Arc, - #[cfg(not(wasm_browser))] pinger: Pinger, - #[cfg(not(wasm_browser))] socket_state: SocketState, - #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, -) -> Result { - if !probe.delay().is_zero() { - trace!("delaying probe"); - time::sleep(probe.delay()).await; - } - debug!("starting probe"); - - let (would_help_tx, would_help_rx) = oneshot::channel(); - if let Err(err) = reportstate - .send(Message::ProbeWouldHelp( - probe.clone(), - relay_node.clone(), - would_help_tx, - )) - .await - { - // this happens on shutdown or if the report is already finished - debug!("Failed to check if probe would help: {err:#}"); - return Err(ProbeErrorWithProbe::AbortSet( - probe_error::ClientGoneSnafu.build(), - probe.clone(), - )); - } - - if !would_help_rx.await.map_err(|_| { - ProbeErrorWithProbe::AbortSet(probe_error::ClientGoneSnafu.build(), probe.clone()) - })? { - return Err(ProbeErrorWithProbe::AbortSet( - probe_error::NotUsefulSnafu.build(), - probe, - )); - } - - #[cfg(not(wasm_browser))] - let relay_addr = get_relay_addr(&socket_state.dns_resolver, &relay_node, probe.proto()) - .await - .map_err(|e| { - ProbeErrorWithProbe::AbortSet( - probe_error::GetRelayAddrSnafu.into_error(e), - probe.clone(), - ) - })?; - - let mut result = ProbeReport::new(probe.clone()); - match probe { - #[cfg(not(wasm_browser))] - Probe::StunIpv4 { .. } | Probe::StunIpv6 { .. } => { - let maybe_sock = if matches!(probe, Probe::StunIpv4 { .. }) { - socket_state.stun_sock4.as_ref() - } else { - socket_state.stun_sock6.as_ref() - }; - match maybe_sock { - Some(sock) => { - result = run_stun_probe(sock, relay_addr, net_report, probe, &metrics).await?; - } - None => { - return Err(ProbeErrorWithProbe::AbortSet( - probe_error::StunSnafu.into_error(stun_error::NoSocketSnafu.build()), - probe.clone(), - )); - } - } - } - #[cfg(not(wasm_browser))] - Probe::IcmpV4 { .. } | Probe::IcmpV6 { .. } => { - result = run_icmp_probe(probe, relay_addr, pinger).await? - } - Probe::Https { ref node, .. } => { - debug!("sending probe HTTPS"); - match measure_https_latency( - #[cfg(not(wasm_browser))] - &socket_state.dns_resolver, - node, - #[cfg(any(test, feature = "test-utils"))] - insecure_skip_relay_cert_verify, - ) - .await - { - Ok((latency, ip)) => { - debug!(?latency, "latency"); - result.latency = Some(latency); - // We set these IPv4 and IPv6 but they're not really used - // and we don't necessarily set them both. If UDP is blocked - // and both IPv4 and IPv6 are available over TCP, it's basically - // random which fields end up getting set here. - // Since they're not needed, that's fine for now. - match ip { - IpAddr::V4(_) => result.ipv4_can_send = true, - IpAddr::V6(_) => result.ipv6_can_send = true, - } - } - Err(err) => { - warn!("https latency measurement failed: {:?}", err); - } - } - } - - #[cfg(not(wasm_browser))] - Probe::QuicIpv4 { ref node, .. } | Probe::QuicIpv6 { ref node, .. } => { - debug!("sending QUIC address discovery probe"); - let url = node.url.clone(); - match socket_state.quic_config { - Some(quic_config) => { - result = run_quic_probe( - quic_config, - url, - relay_addr, - probe, - socket_state.ip_mapped_addrs, - ) - .await?; - } - None => { - return Err(ProbeErrorWithProbe::AbortSet( - probe_error::QuicSnafu.into_error(quic_error::NoEndpointSnafu.build()), - probe.clone(), - )); - } - } - } - } - - trace!("probe successful"); - Ok(result) -} - -/// Run a STUN IPv4 or IPv6 probe. -#[cfg(not(wasm_browser))] -async fn run_stun_probe( - sock: &Arc, - relay_addr: SocketAddr, - net_report: net_report::Addr, - probe: Probe, - metrics: &Metrics, -) -> Result { - match probe.proto() { - ProbeProto::StunIpv4 => debug_assert!(relay_addr.is_ipv4()), - ProbeProto::StunIpv6 => debug_assert!(relay_addr.is_ipv6()), - _ => debug_assert!(false, "wrong probe"), - } - let txid = stun::TransactionId::default(); - let req = stun::request(txid); - - // Setup net_report to give us back the incoming STUN response. - let (stun_tx, stun_rx) = oneshot::channel(); - let (inflight_ready_tx, inflight_ready_rx) = oneshot::channel(); - net_report - .send(net_report::Message::InFlightStun( - net_report::Inflight { - txn: txid, - start: Instant::now(), - s: stun_tx, - }, - inflight_ready_tx, - )) - .await - .map_err(|_| { - ProbeErrorWithProbe::Error(probe_error::ClientGoneSnafu.build(), probe.clone()) - })?; - inflight_ready_rx.await.map_err(|_| { - ProbeErrorWithProbe::Error(probe_error::ClientGoneSnafu.build(), probe.clone()) - })?; - - // Send the probe. - match sock.send_to(&req, relay_addr).await { - Ok(n) if n == req.len() => { - debug!(%relay_addr, %txid, "sending {} probe", probe.proto()); - let mut result = ProbeReport::new(probe.clone()); - - if matches!(probe, Probe::StunIpv4 { .. }) { - result.ipv4_can_send = true; - metrics.stun_packets_sent_ipv4.inc(); - } else { - result.ipv6_can_send = true; - metrics.stun_packets_sent_ipv6.inc(); - } - let (delay, addr) = stun_rx.await.map_err(|_| { - ProbeErrorWithProbe::Error( - probe_error::StunSnafu.into_error(stun_error::StunChannelGoneSnafu.build()), - probe.clone(), +impl Probe { + /// Executes this particular [`Probe`], including using a delayed start if needed. + async fn run( + self, + delay: Duration, + relay_node: Arc, + #[cfg(not(wasm_browser))] socket_state: SocketState, + #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, + ) -> Result { + if !delay.is_zero() { + trace!("delaying probe"); + time::sleep(delay).await; + } + debug!("starting probe"); + + match self { + Probe::Https => { + match run_https_probe( + #[cfg(not(wasm_browser))] + &socket_state.dns_resolver, + relay_node.url.clone(), + #[cfg(any(test, feature = "test-utils"))] + insecure_skip_relay_cert_verify, ) - })?; - result.latency = Some(delay); - result.addr = Some(addr); - Ok(result) - } - Ok(n) => { - let err = stun_error::SendFullSnafu.build(); - error!(%relay_addr, sent_len=n, req_len=req.len(), "{err:#}"); - Err(ProbeErrorWithProbe::Error( - probe_error::StunSnafu.into_error(err), - probe.clone(), - )) - } - Err(err) => { - let kind = err.kind(); - let err = stun_error::SendSnafu.into_error(err); - - // It is entirely normal that we are on a dual-stack machine with no - // routed IPv6 network. So silence that case. - // NetworkUnreachable and HostUnreachable are still experimental (io_error_more - // #86442) but it is already emitted. So hack around this. - match format!("{kind:?}").as_str() { - "NetworkUnreachable" | "HostUnreachable" => { - debug!(%relay_addr, "{err:#}"); - Err(ProbeErrorWithProbe::AbortSet( - probe_error::StunSnafu.into_error(err), - probe.clone(), - )) - } - _ => { - // No need to log this, our caller does already log this. - Err(ProbeErrorWithProbe::Error( - probe_error::StunSnafu.into_error(err), - probe.clone(), - )) + .await + { + Ok(report) => Ok(ProbeReport::Https(report)), + Err(err) => Err(probe_error::HttpsSnafu.into_error(err)), } } + #[cfg(not(wasm_browser))] + Probe::QadIpv4 | Probe::QadIpv6 => unreachable!("must not be used"), } } } #[cfg(not(wasm_browser))] -fn maybe_to_mapped_addr( - ip_mapped_addrs: Option, +pub(super) fn maybe_to_mapped_addr( + ip_mapped_addrs: Option<&IpMappedAddresses>, addr: SocketAddr, ) -> SocketAddr { - if let Some(ip_mapped_addrs) = ip_mapped_addrs.as_ref() { + if let Some(ip_mapped_addrs) = ip_mapped_addrs { return ip_mapped_addrs.get_or_register(addr).private_socket_addr(); } addr } -/// Run a QUIC address discovery probe. -#[cfg(not(wasm_browser))] -async fn run_quic_probe( - quic_config: QuicConfig, - url: RelayUrl, - relay_addr: SocketAddr, - probe: Probe, - ip_mapped_addrs: Option, -) -> Result { - match probe.proto() { - ProbeProto::QuicIpv4 => debug_assert!(relay_addr.is_ipv4()), - ProbeProto::QuicIpv6 => debug_assert!(relay_addr.is_ipv6()), - _ => debug_assert!(false, "wrong probe"), - } - let relay_addr = maybe_to_mapped_addr(ip_mapped_addrs, relay_addr); - let host = match url.host_str() { - Some(host) => host, - None => { - return Err(ProbeErrorWithProbe::Error( - probe_error::QuicSnafu.into_error(quic_error::InvalidUrlSnafu.build()), - probe.clone(), - )); - } - }; - let quic_client = iroh_relay::quic::QuicClient::new(quic_config.ep, quic_config.client_config) - .map_err(|e| { - ProbeErrorWithProbe::Error( - probe_error::QuicSnafu.into_error(quic_error::CreateClientSnafu.into_error(e)), - probe.clone(), - ) - })?; - let (addr, latency) = quic_client - .get_addr_and_latency(relay_addr, host) - .await - .map_err(|e| { - ProbeErrorWithProbe::Error( - probe_error::QuicSnafu.into_error(quic_error::GetAddrSnafu.into_error(e)), - probe.clone(), - ) - })?; - let mut result = ProbeReport::new(probe.clone()); - if matches!(probe, Probe::QuicIpv4 { .. }) { - result.ipv4_can_send = true; - } else { - result.ipv6_can_send = true; - } - result.addr = Some(addr); - result.latency = Some(latency); - Ok(result) -} - #[cfg(not(wasm_browser))] #[derive(Debug, Snafu)] #[snafu(module)] @@ -1157,21 +553,15 @@ async fn check_captive_portal( dm: &RelayMap, preferred_relay: Option, ) -> Result { - // If we have a preferred relay node and we can use it for non-STUN requests, try that; + // If we have a preferred relay node and we can use it for non-QAD requests, try that; // otherwise, pick a random one suitable for non-STUN requests. - let preferred_relay = preferred_relay.and_then(|url| match dm.get_node(&url) { - Some(node) if node.stun_only => Some(url), - _ => None, - }); + + let preferred_relay = preferred_relay.and_then(|url| dm.get_node(&url).map(|_| url)); let url = match preferred_relay { Some(url) => url, None => { - let urls: Vec<_> = dm - .nodes() - .filter(|n| !n.stun_only) - .map(|n| n.url.clone()) - .collect(); + let urls: Vec<_> = dm.nodes().map(|n| n.url.clone()).collect(); if urls.is_empty() { debug!("No suitable relay node for captive portal check"); return Ok(false); @@ -1237,27 +627,16 @@ async fn check_captive_portal( } /// Returns the proper port based on the protocol of the probe. -fn get_port(relay_node: &RelayNode, proto: &ProbeProto) -> Option { - match proto { - #[cfg(not(wasm_browser))] - ProbeProto::QuicIpv4 | ProbeProto::QuicIpv6 => { - if let Some(ref quic) = relay_node.quic { - if quic.port == 0 { - Some(DEFAULT_RELAY_QUIC_PORT) - } else { - Some(quic.port) - } - } else { - None - } - } - _ => { - if relay_node.stun_port == 0 { - Some(DEFAULT_STUN_PORT) - } else { - Some(relay_node.stun_port) - } +#[cfg(not(wasm_browser))] +fn get_quic_port(relay_node: &RelayNode) -> Option { + if let Some(ref quic) = relay_node.quic { + if quic.port == 0 { + Some(DEFAULT_RELAY_QUIC_PORT) + } else { + Some(quic.port) } + } else { + None } } @@ -1280,40 +659,23 @@ pub enum GetRelayAddrError { MissingPort, } -/// Returns the IP address to use to communicate to this relay node. -/// -/// *proto* specifies the protocol of the probe. Depending on the protocol we may return -/// different results. Obviously IPv4 vs IPv6 but a [`RelayNode`] may also have disabled -/// some protocols. -/// -/// If the protocol is `QuicIpv4` or `QuicIpv6`, and `IpMappedAddresses` is not `None`, we -/// assume that we are running this net report with `iroh`, and need to provide mapped -/// addresses to the probe in order for it to function in the specialize iroh-quinn -/// endpoint that expects mapped addresses. +/// Returns the IP address to use to communicate to this relay node for quic. #[cfg(not(wasm_browser))] -async fn get_relay_addr( +pub(super) async fn get_relay_addr_ipv4( dns_resolver: &DnsResolver, relay_node: &RelayNode, - proto: ProbeProto, -) -> Result { - use snafu::OptionExt; - - if relay_node.stun_only && !matches!(proto, ProbeProto::StunIpv4 | ProbeProto::StunIpv6) { - return Err(get_relay_addr_error::UnsupportedRelayNodeSnafu.build()); - } - let port = get_port(relay_node, &proto).context(get_relay_addr_error::MissingPortSnafu)?; - - match proto { - ProbeProto::StunIpv4 | ProbeProto::IcmpV4 | ProbeProto::QuicIpv4 => { - relay_lookup_ipv4_staggered(dns_resolver, relay_node, port).await - } - - ProbeProto::StunIpv6 | ProbeProto::IcmpV6 | ProbeProto::QuicIpv6 => { - relay_lookup_ipv6_staggered(dns_resolver, relay_node, port).await - } +) -> Result { + let port = get_quic_port(relay_node).context(get_relay_addr_error::MissingPortSnafu)?; + relay_lookup_ipv4_staggered(dns_resolver, relay_node, port).await +} - ProbeProto::Https => Err(get_relay_addr_error::UnsupportedHttpsSnafu.build()), - } +#[cfg(not(wasm_browser))] +pub(super) async fn get_relay_addr_ipv6( + dns_resolver: &DnsResolver, + relay_node: &RelayNode, +) -> Result { + let port = get_quic_port(relay_node).context(get_relay_addr_error::MissingPortSnafu)?; + relay_lookup_ipv6_staggered(dns_resolver, relay_node, port).await } /// Do a staggared ipv4 DNS lookup based on [`RelayNode`] @@ -1324,10 +686,10 @@ async fn relay_lookup_ipv4_staggered( dns_resolver: &DnsResolver, relay: &RelayNode, port: u16, -) -> Result { +) -> Result { match relay.url.host() { Some(url::Host::Domain(hostname)) => { - debug!(%hostname, "Performing DNS A lookup for relay addr"); + trace!(%hostname, "Performing DNS A lookup for relay addr"); match dns_resolver .lookup_ipv4_staggered(hostname, DNS_TIMEOUT, DNS_STAGGERING_MS) .await @@ -1335,15 +697,15 @@ async fn relay_lookup_ipv4_staggered( Ok(mut addrs) => addrs .next() .map(|ip| ip.to_canonical()) - .map(|addr| { - debug_assert!(addr.is_ipv4(), "bad DNS lookup: {:?}", addr); - SocketAddr::new(addr, port) + .map(|addr| match addr { + IpAddr::V4(ip) => SocketAddrV4::new(ip, port), + IpAddr::V6(_) => unreachable!("bad DNS lookup: {:?}", addr), }) .ok_or(get_relay_addr_error::NoAddrFoundSnafu.build()), Err(err) => Err(get_relay_addr_error::DnsLookupSnafu.into_error(err)), } } - Some(url::Host::Ipv4(addr)) => Ok(SocketAddr::new(addr.into(), port)), + Some(url::Host::Ipv4(addr)) => Ok(SocketAddrV4::new(addr, port)), Some(url::Host::Ipv6(_addr)) => Err(get_relay_addr_error::NoAddrFoundSnafu.build()), None => Err(get_relay_addr_error::InvalidHostnameSnafu.build()), } @@ -1357,79 +719,34 @@ async fn relay_lookup_ipv6_staggered( dns_resolver: &DnsResolver, relay: &RelayNode, port: u16, -) -> Result { +) -> Result { match relay.url.host() { Some(url::Host::Domain(hostname)) => { - debug!(%hostname, "Performing DNS AAAA lookup for relay addr"); + trace!(%hostname, "Performing DNS AAAA lookup for relay addr"); match dns_resolver .lookup_ipv6_staggered(hostname, DNS_TIMEOUT, DNS_STAGGERING_MS) .await { Ok(mut addrs) => addrs .next() - .map(|addr| { - debug_assert!(addr.is_ipv6(), "bad DNS lookup: {:?}", addr); - SocketAddr::new(addr, port) + .map(|addr| match addr { + IpAddr::V4(_) => unreachable!("bad DNS lookup: {:?}", addr), + IpAddr::V6(ip) => SocketAddrV6::new(ip, port, 0, 0), }) .ok_or(get_relay_addr_error::NoAddrFoundSnafu.build()), Err(err) => Err(get_relay_addr_error::DnsLookupSnafu.into_error(err)), } } Some(url::Host::Ipv4(_addr)) => Err(get_relay_addr_error::NoAddrFoundSnafu.build()), - Some(url::Host::Ipv6(addr)) => Ok(SocketAddr::new(addr.into(), port)), + Some(url::Host::Ipv6(addr)) => Ok(SocketAddrV6::new(addr, port, 0, 0)), None => Err(get_relay_addr_error::InvalidHostnameSnafu.build()), } } -/// Runs an ICMP IPv4 or IPv6 probe. -/// -/// The `pinger` is passed in so the ping sockets are only bound once -/// for the probe set. -#[cfg(not(wasm_browser))] -async fn run_icmp_probe( - probe: Probe, - relay_addr: SocketAddr, - pinger: Pinger, -) -> Result { - match probe.proto() { - ProbeProto::IcmpV4 => debug_assert!(relay_addr.is_ipv4()), - ProbeProto::IcmpV6 => debug_assert!(relay_addr.is_ipv6()), - _ => debug_assert!(false, "wrong probe"), - } - const DATA: &[u8; 15] = b"iroh icmp probe"; - debug!(dst = %relay_addr, len = DATA.len(), "ICMP Ping started"); - let latency = pinger - .send(relay_addr.ip(), DATA) - .await - .map_err(|err| match err { - PingError::CreateClientIpv4 { .. } | PingError::CreateClientIpv6 { .. } => { - ProbeErrorWithProbe::AbortSet(probe_error::IcmpSnafu.into_error(err), probe.clone()) - } - #[cfg(not(wasm_browser))] - PingError::Ping { .. } => { - ProbeErrorWithProbe::Error(probe_error::IcmpSnafu.into_error(err), probe.clone()) - } - })?; - debug!(dst = %relay_addr, len = DATA.len(), ?latency, "ICMP ping done"); - let mut report = ProbeReport::new(probe); - report.latency = Some(latency); - match relay_addr { - SocketAddr::V4(_) => { - report.ipv4_can_send = true; - report.icmpv4 = Some(true); - } - SocketAddr::V6(_) => { - report.ipv6_can_send = true; - report.icmpv6 = Some(true); - } - } - Ok(report) -} - #[derive(Debug, Snafu)] #[snafu(module)] #[non_exhaustive] -enum MeasureHttpsLatencyError { +pub enum MeasureHttpsLatencyError { #[snafu(transparent)] InvalidUrl { source: url::ParseError }, #[cfg(not(wasm_browser))] @@ -1448,12 +765,13 @@ enum MeasureHttpsLatencyError { /// If `certs` is provided they will be added to the trusted root certificates, allowing the /// use of self-signed certificates for servers. Currently this is used for testing. #[allow(clippy::unused_async)] -async fn measure_https_latency( +async fn run_https_probe( #[cfg(not(wasm_browser))] dns_resolver: &DnsResolver, - node: &RelayNode, + relay_node: RelayUrl, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, -) -> Result<(Duration, IpAddr), MeasureHttpsLatencyError> { - let url = node.url.join(RELAY_PROBE_PATH)?; +) -> Result { + trace!("HTTPS probe start"); + let url = relay_node.join(RELAY_PROBE_PATH)?; // This should also use same connection establishment as relay client itself, which // needs to be more configurable so users can do more crazy things: @@ -1479,6 +797,7 @@ async fn measure_https_latency( .await? .map(|ipaddr| SocketAddr::new(ipaddr, 0)) .collect(); + trace!(?addrs, "resolved addrs"); builder = builder.resolve_to_addrs(domain, &addrs); } @@ -1497,15 +816,6 @@ async fn measure_https_latency( .context(measure_https_latency_error::HttpRequestSnafu)?; let latency = start.elapsed(); if response.status().is_success() { - // Only `None` if a different hyper HttpConnector in the request. - #[cfg(not(wasm_browser))] - let remote_ip = response - .remote_addr() - .expect("missing HttpInfo from HttpConnector") - .ip(); - #[cfg(wasm_browser)] - let remote_ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED); - // Drain the response body to be nice to the server, up to a limit. const MAX_BODY_SIZE: usize = 8 << 10; // 8 KiB let mut body_size = 0; @@ -1518,7 +828,10 @@ async fn measure_https_latency( } } - Ok((latency, remote_ip)) + Ok(HttpsProbeReport { + node: relay_node, + latency, + }) } else { Err(measure_https_latency_error::InvalidResponseSnafu { status: response.status(), @@ -1527,414 +840,46 @@ async fn measure_https_latency( } } -/// Updates a net_report [`Report`] with a new [`ProbeReport`]. -fn update_report(report: &mut Report, probe_report: ProbeReport) { - let relay_node = probe_report.probe.node(); - if let Some(latency) = probe_report.latency { - report - .relay_latency - .update_relay(relay_node.url.clone(), latency); - - #[cfg(not(wasm_browser))] - if matches!( - probe_report.probe.proto(), - ProbeProto::StunIpv4 - | ProbeProto::StunIpv6 - | ProbeProto::QuicIpv4 - | ProbeProto::QuicIpv6 - ) { - report.udp = true; - - match probe_report.addr { - Some(SocketAddr::V4(ipp)) => { - report.ipv4 = true; - report - .relay_v4_latency - .update_relay(relay_node.url.clone(), latency); - if report.global_v4.is_none() { - report.global_v4 = Some(ipp); - } else if report.global_v4 != Some(ipp) { - report.mapping_varies_by_dest_ip = Some(true); - } else if report.mapping_varies_by_dest_ip.is_none() { - report.mapping_varies_by_dest_ip = Some(false); - } - } - Some(SocketAddr::V6(ipp)) => { - report.ipv6 = true; - report - .relay_v6_latency - .update_relay(relay_node.url.clone(), latency); - if report.global_v6.is_none() { - report.global_v6 = Some(ipp); - } else if report.global_v6 != Some(ipp) { - report.mapping_varies_by_dest_ipv6 = Some(true); - warn!("IPv6 Address detected by STUN varies by destination"); - } else if report.mapping_varies_by_dest_ipv6.is_none() { - report.mapping_varies_by_dest_ipv6 = Some(false); - } - } - None => { - // If we are here we had a relay server latency reported from a STUN probe. - // Thus we must have a reported address. - debug_assert!(probe_report.addr.is_some()); - } - } - } - } - report.ipv4_can_send |= probe_report.ipv4_can_send; - report.ipv6_can_send |= probe_report.ipv6_can_send; - report.icmpv4 = report - .icmpv4 - .map(|val| val || probe_report.icmpv4.unwrap_or_default()) - .or(probe_report.icmpv4); - report.icmpv6 = report - .icmpv6 - .map(|val| val || probe_report.icmpv6.unwrap_or_default()) - .or(probe_report.icmpv6); -} - -/// Resolves to pending if the inner is `None`. -#[derive(Debug)] -pub(crate) struct MaybeFuture { - /// Future to be polled. - pub inner: Option, -} - -// NOTE: explicit implementation to bypass derive unnecessary bounds -impl Default for MaybeFuture { - fn default() -> Self { - MaybeFuture { inner: None } - } -} - -impl Future for MaybeFuture { - type Output = T::Output; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - match self.inner { - Some(ref mut t) => Pin::new(t).poll(cx), - None => Poll::Pending, - } - } -} - #[cfg(test)] mod tests { - use std::net::{Ipv4Addr, Ipv6Addr}; + use std::net::Ipv4Addr; + use iroh_relay::dns::DnsResolver; use n0_snafu::{Result, ResultExt}; use tracing_test::traced_test; use super::{super::test_utils, *}; - use crate::net_report::dns; - - #[tokio::test] - #[traced_test] - async fn test_update_report_stun_working() { - let (_server_a, relay_a) = test_utils::relay().await; - let (_server_b, relay_b) = test_utils::relay().await; - - let mut report = Report::default(); - let relay_a = Arc::new(relay_a); - let relay_b = Arc::new(relay_b); - - // A STUN IPv4 probe from the the first relay server. - let probe_report_a = ProbeReport { - ipv4_can_send: true, - ipv6_can_send: false, - icmpv4: None, - icmpv6: None, - latency: Some(Duration::from_millis(5)), - probe: Probe::StunIpv4 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv4Addr::new(203, 0, 113, 1), 1234).into()), - }; - update_report(&mut report, probe_report_a.clone()); - - assert!(report.udp); - assert_eq!( - report.relay_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert_eq!( - report.relay_v4_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert!(report.ipv4_can_send); - assert!(!report.ipv6_can_send); - - // A second STUN IPv4 probe, same external IP detected but slower. - let probe_report_b = ProbeReport { - latency: Some(Duration::from_millis(8)), - probe: Probe::StunIpv4 { - delay: Duration::ZERO, - node: relay_b.clone(), - }, - ..probe_report_a - }; - update_report(&mut report, probe_report_b); - - assert!(report.udp); - assert_eq!( - report.relay_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert_eq!( - report.relay_v4_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(5) - ); - assert!(report.ipv4_can_send); - assert!(!report.ipv6_can_send); - - // A STUN IPv6 probe, this one is faster. - let probe_report_a_ipv6 = ProbeReport { - ipv4_can_send: false, - ipv6_can_send: true, - icmpv4: None, - icmpv6: None, - latency: Some(Duration::from_millis(4)), - probe: Probe::StunIpv6 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv6Addr::new(2001, 0xdb8, 0, 0, 0, 0, 0, 1), 1234).into()), - }; - update_report(&mut report, probe_report_a_ipv6); - - assert!(report.udp); - assert_eq!( - report.relay_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(4) - ); - assert_eq!( - report.relay_v6_latency.get(&relay_a.url).unwrap(), - Duration::from_millis(4) - ); - assert!(report.ipv4_can_send); - assert!(report.ipv6_can_send); - } - - #[tokio::test] - #[traced_test] - async fn test_update_report_icmp() { - let (_server_a, relay_a) = test_utils::relay().await; - let (_server_b, relay_b) = test_utils::relay().await; - let relay_a = Arc::new(relay_a); - let relay_b = Arc::new(relay_b); - - let mut report = Report::default(); - - // An ICMPv4 probe from the EU relay server. - let probe_report_eu = ProbeReport { - ipv4_can_send: true, - ipv6_can_send: false, - icmpv4: Some(true), - icmpv6: None, - latency: Some(Duration::from_millis(5)), - probe: Probe::IcmpV4 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv4Addr::new(203, 0, 113, 1), 1234).into()), - }; - update_report(&mut report, probe_report_eu.clone()); - - assert!(!report.udp); - assert!(report.ipv4_can_send); - assert_eq!(report.icmpv4, Some(true)); - - // A second ICMPv4 probe which did not work. - let probe_report_na = ProbeReport { - ipv4_can_send: false, - ipv6_can_send: false, - icmpv4: Some(false), - icmpv6: None, - latency: None, - probe: Probe::IcmpV4 { - delay: Duration::ZERO, - node: relay_b.clone(), - }, - addr: None, - }; - update_report(&mut report, probe_report_na); - - assert_eq!(report.icmpv4, Some(true)); - - // Behold, a STUN probe arrives! - let probe_report_eu_stun = ProbeReport { - ipv4_can_send: true, - ipv6_can_send: false, - icmpv4: None, - icmpv6: None, - latency: Some(Duration::from_millis(5)), - probe: Probe::StunIpv4 { - delay: Duration::ZERO, - node: relay_a.clone(), - }, - addr: Some((Ipv4Addr::new(203, 0, 113, 1), 1234).into()), - }; - update_report(&mut report, probe_report_eu_stun); - - assert!(report.udp); - assert_eq!(report.icmpv4, Some(true)); - } - - // # ICMP permissions on Linux - // - // ## Using capabilities: CAP_NET_RAW - // - // To run ICMP tests on Linux you need CAP_NET_RAW capabilities. When running tests - // this means you first need to build the binary, set the capabilities and finally run - // the tests. - // - // Build the test binary: - // - // cargo nextest run -p iroh net_report::reportgen::tests --no-run - // - // Find out the test binary location: - // - // cargo nextest list --message-format json -p iroh net_report::reportgen::tests \ - // | jq '."rust-suites"."iroh"."binary-path"' | tr -d \" - // - // Set the CAP_NET_RAW permission, note that nextest runs each test in a child process - // so the capabilities need to be inherited: - // - // sudo setcap CAP_NET_RAW=eip target/debug/deps/iroh-abc123 - // - // Finally run the test: - // - // cargo nextest run -p iroh net_report::reportgen::tests - // - // This allows the pinger to create a SOCK_RAW socket for IPPROTO_ICMP. - // - // - // ## Using sysctl - // - // Now you know the hard way, you can also get this permission a little easier, but - // slightly less secure, by allowing any process running with your group ID to create a - // SOCK_DGRAM for IPPROTO_ICMP. - // - // First find out your group ID: - // - // id --group - // - // Then allow this group to send pings. Note that this is an inclusive range: - // - // sudo sysctl net.ipv4.ping_group_range="1234 1234" - // - // Note that this does not survive a reboot usually, commonly you need to edit - // /etc/sysctl.conf or /etc/sysctl.d/* to persist this across reboots. - // - // TODO: Not sure what about IPv6 pings using sysctl. - #[tokio::test] - #[traced_test] - async fn test_icmpk_probe() { - let pinger = Pinger::new(); - let (server, node) = test_utils::relay().await; - let addr = server.stun_addr().expect("test relay serves stun"); - let probe = Probe::IcmpV4 { - delay: Duration::from_secs(0), - node: Arc::new(node), - }; - - // A single ICMP packet might get lost. Try several and take the first. - let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); - let mut tasks = JoinSet::new(); - for i in 0..8 { - let probe = probe.clone(); - let pinger = pinger.clone(); - let tx = tx.clone(); - tasks.spawn(async move { - time::sleep(Duration::from_millis(i * 100)).await; - let res = run_icmp_probe(probe, addr, pinger).await; - tx.send(res).ok(); - }); - } - let mut last_err = None; - while let Some(res) = rx.recv().await { - match res { - Ok(report) => { - dbg!(&report); - assert_eq!(report.icmpv4, Some(true)); - assert!( - report.latency.expect("should have a latency") > Duration::from_secs(0) - ); - break; - } - Err(ProbeErrorWithProbe::Error(err, _probe)) => { - last_err = Some(err); - } - Err(ProbeErrorWithProbe::AbortSet(_err, _probe)) => { - // We don't have permission, too bad. - // panic!("no ping permission: {err:#}"); - break; - } - } - } - if let Some(err) = last_err { - panic!("Ping error: {err:#}"); - } - } #[tokio::test] async fn test_measure_https_latency() -> Result { let (_server, relay) = test_utils::relay().await; - let dns_resolver = dns::tests::resolver(); + let dns_resolver = DnsResolver::new(); tracing::info!(relay_url = ?relay.url , "RELAY_URL"); - let (latency, ip) = measure_https_latency(&dns_resolver, &relay, true).await?; + let report = run_https_probe(&dns_resolver, relay.url, true).await?; - assert!(latency > Duration::ZERO); + assert!(report.latency > Duration::ZERO); - let relay_url_ip = relay - .url - .host_str() - .unwrap() - .parse::() - .e()?; - assert_eq!(ip, relay_url_ip); Ok(()) } #[tokio::test] #[traced_test] - async fn test_quic_probe() -> Result { + async fn test_qad_probe_v4() -> Result { let (server, relay) = test_utils::relay().await; let relay = Arc::new(relay); let client_config = iroh_relay::client::make_dangerous_client_config(); let ep = quinn::Endpoint::client(SocketAddr::new(Ipv4Addr::LOCALHOST.into(), 0)).e()?; let client_addr = ep.local_addr().e()?; - let quic_addr_disc = QuicConfig { - ep: ep.clone(), - client_config, - ipv4: true, - ipv6: true, - }; - let url = relay.url.clone(); - let port = server.quic_addr().unwrap().port(); - let probe = Probe::QuicIpv4 { - delay: Duration::from_secs(0), - node: relay, - }; - let probe = match run_quic_probe( - quic_addr_disc, - url, - (Ipv4Addr::LOCALHOST, port).into(), - probe, - None, - ) - .await - { - Ok(probe) => probe, - Err(e) => match e { - ProbeErrorWithProbe::AbortSet(err, _) | ProbeErrorWithProbe::Error(err, _) => { - return Err(err.into()); - } - }, - }; - assert!(probe.ipv4_can_send); - assert_eq!(probe.addr.unwrap(), client_addr); + + let quic_client = iroh_relay::quic::QuicClient::new(ep.clone(), client_config); + let dns_resolver = DnsResolver::default(); + + let (report, conn) = super::super::run_probe_v4(None, relay, quic_client, dns_resolver) + .await + .unwrap(); + + assert_eq!(report.addr, client_addr); + drop(conn); ep.wait_idle().await; server.shutdown().await?; Ok(()) diff --git a/iroh/src/net_report/reportgen/hairpin.rs b/iroh/src/net_report/reportgen/hairpin.rs deleted file mode 100644 index 097a65a53ef..00000000000 --- a/iroh/src/net_report/reportgen/hairpin.rs +++ /dev/null @@ -1,323 +0,0 @@ -//! Actor to run hairpinning check. -//! -//! This actor works as follows: -//! -//! - After starting prepares the haircheck: -//! - binds socket -//! - sends traffic from it's socket to trick some routers -//! - When requested performs the hairpin probe. -//! - result is sent to net_report actor addr. -//! - Shuts down -//! -//! Note it will only perform a single hairpin check before shutting down. Any further -//! requests to it will fail which is intentional. - -use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; - -use iroh_relay::protos::stun; -use n0_future::{ - task::{self, AbortOnDropHandle}, - time::{self, Instant}, -}; -use netwatch::UdpSocket; -use snafu::Snafu; -use tokio::sync::oneshot; -use tracing::{debug, error, info_span, trace, warn, Instrument}; - -use crate::net_report::{self, defaults::timeouts::HAIRPIN_CHECK_TIMEOUT, reportgen, Inflight}; - -/// Handle to the hairpin actor. -/// -/// Dropping it will abort the actor. -#[derive(Debug)] -pub(super) struct Client { - addr: Option>, - _drop_guard: AbortOnDropHandle<()>, -} - -impl Client { - pub(super) fn new(net_report: net_report::Addr, reportgen: reportgen::Addr) -> Self { - let (addr, msg_rx) = oneshot::channel(); - - let actor = Actor { - msg_rx, - net_report, - reportgen, - }; - - let task = - task::spawn(async move { actor.run().await }.instrument(info_span!("hairpin.actor"))); - Self { - addr: Some(addr), - _drop_guard: AbortOnDropHandle::new(task), - } - } - - /// Returns `true` if we have started a hairpin check before. - pub(super) fn has_started(&self) -> bool { - self.addr.is_none() - } - - /// Starts the hairpin check. - /// - /// *dst* should be our own address as discovered by STUN. Hairpin detection works by - /// sending a new STUN request to our own public address, if we receive this request - /// back then hairpinning works, otherwise it does not. - /// - /// Will do nothing if this actor is already finished or a check has already started. - pub(super) fn start_check(&mut self, dst: SocketAddrV4) { - if let Some(addr) = self.addr.take() { - addr.send(Message::StartCheck(dst)).ok(); - } - } -} - -#[derive(Debug)] -enum Message { - /// Performs the hairpin check. - /// - /// The STUN request will be sent to the provided [`SocketAddrV4`] which should be our - /// own address discovered using STUN. - StartCheck(SocketAddrV4), -} - -#[derive(Debug)] -struct Actor { - msg_rx: oneshot::Receiver, - net_report: net_report::Addr, - reportgen: reportgen::Addr, -} - -#[derive(Debug, Snafu)] -enum Error { - #[snafu(transparent)] - Io { source: std::io::Error }, - #[snafu(display("net_report actor is gone"))] - NetReportActorGone, - #[snafu(display("reportgen actor is gone"))] - ReportGenActorGone, - #[snafu(display("stun response channel dropped"))] - StunResponseGone, -} - -impl Actor { - async fn run(self) { - match self.run_inner().await { - Ok(_) => trace!("hairpin actor finished successfully"), - Err(err) => error!("Hairpin actor failed: {err:#}"), - } - } - - async fn run_inner(self) -> Result<(), Error> { - let socket = UdpSocket::bind_v4(0)?; - - if let Err(err) = Self::prepare_hairpin(&socket).await { - warn!("unable to send hairpin prep: {err:#}"); - // Continue anyway, most routers are fine. - } - - // We only have one message to handle - let Ok(Message::StartCheck(dst)) = self.msg_rx.await else { - return Ok(()); - }; - - let txn = stun::TransactionId::default(); - trace!(%txn, "Sending hairpin with transaction ID"); - let (stun_tx, stun_rx) = oneshot::channel(); - let inflight = Inflight { - txn, - start: Instant::now(), // ignored by hairping probe - s: stun_tx, - }; - let (msg_response_tx, msg_response_rx) = oneshot::channel(); - self.net_report - .send(net_report::Message::InFlightStun(inflight, msg_response_tx)) - .await - .map_err(|_| NetReportActorGoneSnafu.build())?; - - msg_response_rx - .await - .map_err(|_| NetReportActorGoneSnafu.build())?; - - if let Err(err) = socket.send_to(&stun::request(txn), dst.into()).await { - warn!(%dst, "failed to send hairpin check"); - return Err(err.into()); - } - - let now = Instant::now(); - let hairpinning_works = match time::timeout(HAIRPIN_CHECK_TIMEOUT, stun_rx).await { - Ok(Ok(_)) => true, - Ok(Err(_)) => return Err(StunResponseGoneSnafu.build()), - Err(_) => false, // Elapsed - }; - debug!( - "hairpinning done in {:?}, res: {:?}", - now.elapsed(), - hairpinning_works - ); - - self.reportgen - .send(super::Message::HairpinResult(hairpinning_works)) - .await - .map_err(|_| ReportGenActorGoneSnafu.build())?; - - trace!("reportgen notified"); - - Ok(()) - } - - async fn prepare_hairpin(socket: &UdpSocket) -> std::io::Result<()> { - // At least the Apple Airport Extreme doesn't allow hairpin - // sends from a private socket until it's seen traffic from - // that src IP:port to something else out on the internet. - // - // See https://github.com/tailscale/tailscale/issues/188#issuecomment-600728643 - // - // And it seems that even sending to a likely-filtered RFC 5737 - // documentation-only IPv4 range is enough to set up the mapping. - // So do that for now. In the future we might want to classify networks - // that do and don't require this separately. But for now help it. - let documentation_ip = SocketAddr::from((Ipv4Addr::new(203, 0, 113, 1), 12345)); - - socket - .send_to( - b"net_report; see https://github.com/tailscale/tailscale/issues/188", - documentation_ip, - ) - .await?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use bytes::BytesMut; - use tokio::sync::mpsc; - use tracing::info; - use tracing_test::traced_test; - - use super::*; - - #[tokio::test] - #[traced_test] - async fn test_hairpin_success() { - for i in 0..100 { - let now = Instant::now(); - test_hairpin(true).await; - println!("done round {} in {:?}", i + 1, now.elapsed()); - } - } - - #[tokio::test] - #[traced_test] - async fn test_hairpin_failure() { - test_hairpin(false).await; - } - - async fn test_hairpin(hairpinning_works: bool) { - // Setup fake net_report and reportstate actors, hairpinning interacts with them. - let (net_report_tx, mut net_report_rx) = mpsc::channel(32); - let net_report_addr = net_report::Addr { - sender: net_report_tx, - metrics: Default::default(), - }; - let (reportstate_tx, mut reportstate_rx) = mpsc::channel(32); - let reportstate_addr = reportgen::Addr { - sender: reportstate_tx, - }; - - // Create hairpin actor - let mut actor = Client::new(net_report_addr, reportstate_addr); - - // Hairpinning works by asking the hairpin actor to send a STUN request to our - // discovered public address. If the router returns it hairpinning works. We - // emulate this by binding a random socket which we pretend is our publicly - // discovered address. The hairpin actor will send it a request and we return it - // via the inflight channel. - let public_sock = UdpSocket::bind_local_v4(0).unwrap(); - let ipp_v4 = match public_sock.local_addr().unwrap() { - SocketAddr::V4(ipp) => ipp, - SocketAddr::V6(_) => unreachable!(), - }; - actor.start_check(ipp_v4); - - // This bit is our dummy net_report actor: it handles the inflight request and sends - // back the STUN request once it arrives. - let dummy_net_report = tokio::spawn( - async move { - let net_report::Message::InFlightStun(inflight, resp_tx) = - net_report_rx.recv().await.unwrap() - else { - panic!("Wrong message received"); - }; - resp_tx.send(()).unwrap(); - - let mut buf = BytesMut::zeroed(64 << 10); - let (count, addr) = public_sock.recv_from(&mut buf).await.unwrap(); - info!( - addr=?public_sock.local_addr().unwrap(), - %count, - "Forwarding payload to hairpin actor", - ); - let payload = buf.split_to(count).freeze(); - let txn = stun::parse_binding_request(&payload).unwrap(); - assert_eq!(txn, inflight.txn); - - if hairpinning_works { - // We want hairpinning to work, send back the STUN request. - inflight.s.send((Duration::new(0, 1), addr)).unwrap(); - } else { - // We want hairpinning to fail, just wait but do not drop the STUN response - // channel because that would make the hairpin actor detect an error. - info!("Received hairpin request, not sending response"); - tokio::time::sleep(HAIRPIN_CHECK_TIMEOUT * 8).await; - } - } - .instrument(info_span!("dummy-net_report")), - ); - - // Next we expect our dummy reportstate to receive the result. - match reportstate_rx.recv().await { - Some(reportgen::Message::HairpinResult(val)) => assert_eq!(val, hairpinning_works), - Some(msg) => panic!("Unexpected reportstate message: {msg:?}"), - None => panic!("reportstate mpsc has no senders"), - } - - // Cleanup: our dummy net_report actor should finish - dummy_net_report - .await - .expect("error in dummy net_report actor"); - } - - #[tokio::test] - #[traced_test] - async fn test_client_drop() { - // Setup fake net_report and reportstate actors, hairpinning interacts with them. - let (net_report_tx, _net_report_rx) = mpsc::channel(32); - let net_report_addr = net_report::Addr { - sender: net_report_tx, - metrics: Default::default(), - }; - let (reportstate_tx, _reportstate_rx) = mpsc::channel(32); - let reportstate_addr = reportgen::Addr { - sender: reportstate_tx, - }; - - // Create hairpin actor - let mut client = Client::new(net_report_addr, reportstate_addr); - - // Save the addr, drop the client - let addr = client.addr.take(); - drop(client); - tokio::task::yield_now().await; - - // Check the actor is gone - let ipp_v4 = SocketAddrV4::new(Ipv4Addr::LOCALHOST, 10); - match addr.unwrap().send(Message::StartCheck(ipp_v4)) { - Err(_) => (), - _ => panic!("actor still running"), - } - } -} diff --git a/iroh/src/net_report/reportgen/probes.rs b/iroh/src/net_report/reportgen/probes.rs deleted file mode 100644 index 39eb2d90695..00000000000 --- a/iroh/src/net_report/reportgen/probes.rs +++ /dev/null @@ -1,1185 +0,0 @@ -//! The relay probes. -//! -//! All the probes try and establish the latency to the relay servers. Preferably the STUN -//! probes work and we also learn about our public IP addresses and ports. But fallback -//! probes for HTTPS and ICMP exist as well. - -use std::{collections::BTreeSet, fmt, sync::Arc}; - -use iroh_base::RelayUrl; -use iroh_relay::{RelayMap, RelayNode}; -use n0_future::time::Duration; -#[cfg(not(wasm_browser))] -use netwatch::interfaces; -use snafu::Snafu; - -use crate::net_report::Report; - -/// The retransmit interval used when net_report first runs. -/// -/// We have no past context to work with, and we want answers relatively quickly, so it's -/// biased slightly more aggressive than [`DEFAULT_ACTIVE_RETRANSMIT_DELAY`]. A few extra -/// packets at startup is fine. -const DEFAULT_INITIAL_RETRANSMIT: Duration = Duration::from_millis(100); - -/// The retransmit interval used when a previous report exists but is missing latency. -/// -/// When in an active steady-state, i.e. a previous report exists, we use the latency of the -/// previous report to determine the retransmit interval. However when this previous relay -/// latency is missing this default is used. -/// -/// This is a somewhat conservative guess because if we have no data, likely the relay node -/// is very far away and we have no data because we timed out the last time we probed it. -const DEFAULT_ACTIVE_RETRANSMIT_DELAY: Duration = Duration::from_millis(200); - -/// The extra time to add to retransmits if a previous report exists. -/// -/// When in an active steady-state, i.e. a previous report exists, we add this delay -/// multiplied with the attempt to probe retries to give later attempts increasingly more -/// time. -const ACTIVE_RETRANSMIT_EXTRA_DELAY: Duration = Duration::from_millis(50); - -/// The number of fastest relays to periodically re-query during incremental net_report -/// reports. (During a full report, all relay servers are scanned.) -const NUM_INCREMENTAL_RELAYS: usize = 3; - -/// The protocol used to time a node's latency. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, derive_more::Display)] -#[repr(u8)] -pub enum ProbeProto { - /// STUN IPv4 - #[cfg(not(wasm_browser))] - StunIpv4, - /// STUN IPv6 - #[cfg(not(wasm_browser))] - StunIpv6, - /// HTTPS - Https, - /// ICMP IPv4 - #[cfg(not(wasm_browser))] - IcmpV4, - /// ICMP IPv6 - #[cfg(not(wasm_browser))] - IcmpV6, - /// QUIC Address Discovery Ipv4 - #[cfg(not(wasm_browser))] - QuicIpv4, - /// QUIC Address Discovery Ipv6 - #[cfg(not(wasm_browser))] - QuicIpv6, -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, derive_more::Display)] -pub(super) enum Probe { - #[display("STUN Ipv4 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - StunIpv4 { - /// When the probe is started, relative to the time that `get_report` is called. - /// One probe in each `ProbePlan` should have a delay of 0. Non-zero values - /// are for retries on UDP loss or timeout. - delay: Duration, - - /// The relay server to send this probe to. - node: Arc, - }, - #[display("STUN Ipv6 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - StunIpv6 { - delay: Duration, - node: Arc, - }, - #[display("HTTPS after {delay:?} to {node}")] - Https { - delay: Duration, - node: Arc, - }, - #[display("ICMPv4 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - IcmpV4 { - delay: Duration, - node: Arc, - }, - #[display("ICMPv6 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - IcmpV6 { - delay: Duration, - node: Arc, - }, - #[display("QAD Ipv4 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - QuicIpv4 { - delay: Duration, - node: Arc, - }, - #[display("QAD Ipv6 after {delay:?} to {node}")] - #[cfg(not(wasm_browser))] - QuicIpv6 { - delay: Duration, - node: Arc, - }, -} - -impl Probe { - pub(super) fn delay(&self) -> Duration { - match self { - #[cfg(not(wasm_browser))] - Probe::StunIpv4 { delay, .. } - | Probe::StunIpv6 { delay, .. } - | Probe::Https { delay, .. } - | Probe::IcmpV4 { delay, .. } - | Probe::IcmpV6 { delay, .. } - | Probe::QuicIpv4 { delay, .. } - | Probe::QuicIpv6 { delay, .. } => *delay, - #[cfg(wasm_browser)] - Probe::Https { delay, .. } => *delay, - } - } - - pub(super) fn proto(&self) -> ProbeProto { - match self { - #[cfg(not(wasm_browser))] - Probe::StunIpv4 { .. } => ProbeProto::StunIpv4, - #[cfg(not(wasm_browser))] - Probe::StunIpv6 { .. } => ProbeProto::StunIpv6, - Probe::Https { .. } => ProbeProto::Https, - #[cfg(not(wasm_browser))] - Probe::IcmpV4 { .. } => ProbeProto::IcmpV4, - #[cfg(not(wasm_browser))] - Probe::IcmpV6 { .. } => ProbeProto::IcmpV6, - #[cfg(not(wasm_browser))] - Probe::QuicIpv4 { .. } => ProbeProto::QuicIpv4, - #[cfg(not(wasm_browser))] - Probe::QuicIpv6 { .. } => ProbeProto::QuicIpv6, - } - } - - pub(super) fn node(&self) -> &Arc { - match self { - #[cfg(not(wasm_browser))] - Probe::StunIpv4 { node, .. } - | Probe::StunIpv6 { node, .. } - | Probe::Https { node, .. } - | Probe::IcmpV4 { node, .. } - | Probe::IcmpV6 { node, .. } - | Probe::QuicIpv4 { node, .. } - | Probe::QuicIpv6 { node, .. } => node, - #[cfg(wasm_browser)] - Probe::Https { node, .. } => node, - } - } -} - -/// A probe set is a sequence of similar [`Probe`]s with delays between them. -/// -/// The probes are to the same Relayer and of the same [`ProbeProto`] but will have different -/// delays. The delays are effectively retries, though they do not wait for the previous -/// probe to be finished. The first successful probe will cancel all other probes in the -/// set. -/// -/// This is a lot of type-safety by convention. It would be so much nicer to have this -/// compile-time checked but that introduces a giant mess of generics and traits and -/// associated exploding types. -/// -/// A [`ProbeSet`] implements [`IntoIterator`] similar to how [`Vec`] does. -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(super) struct ProbeSet { - /// The [`ProbeProto`] all the probes in this set have. - proto: ProbeProto, - /// The probes in the set. - probes: Vec, -} - -#[derive(Debug, Snafu)] -#[snafu(display("Mismatching probe"))] -struct PushError; - -impl ProbeSet { - fn new(proto: ProbeProto) -> Self { - Self { - probes: Vec::new(), - proto, - } - } - - fn push(&mut self, probe: Probe) -> Result<(), PushError> { - if probe.proto() != self.proto { - return Err(PushError); - } - self.probes.push(probe); - Ok(()) - } - - fn is_empty(&self) -> bool { - self.probes.is_empty() - } -} - -impl<'a> IntoIterator for &'a ProbeSet { - type Item = &'a Probe; - - type IntoIter = std::slice::Iter<'a, Probe>; - - fn into_iter(self) -> Self::IntoIter { - self.probes.iter() - } -} - -impl fmt::Display for ProbeSet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, r#"ProbeSet("{}") {{"#, self.proto)?; - for probe in self.probes.iter() { - writeln!(f, " {probe},")?; - } - writeln!(f, "}}") - } -} - -/// A probe plan. -/// -/// A probe plan contains a number of [`ProbeSet`]s containing probes to be executed. -/// Generally the first probe of of a set which completes aborts the remaining probes of a -/// set. Sometimes a failing probe can also abort the remaining probes of a set. -/// -/// The [`reportgen`] actor will also abort all the remaining [`ProbeSet`]s once it has -/// sufficient information for a report. -/// -/// [`reportgen`]: crate::net_report::reportgen -#[derive(Debug, PartialEq, Eq)] -pub(super) struct ProbePlan { - set: BTreeSet, - protocols: BTreeSet, -} - -impl ProbePlan { - /// Creates an initial probe plan. - #[cfg(not(wasm_browser))] - pub(super) fn initial( - relay_map: &RelayMap, - protocols: &BTreeSet, - if_state: &interfaces::State, - ) -> Self { - let mut plan = Self { - set: BTreeSet::new(), - protocols: protocols.clone(), - }; - - // The first time we need add probes after the STUN we record this delay, so that - // further relay server can reuse this delay. - let mut max_high_prio_delay: Option = None; - - for relay_node in relay_map.nodes() { - let mut stun_ipv4_probes = ProbeSet::new(ProbeProto::StunIpv4); - let mut stun_ipv6_probes = ProbeSet::new(ProbeProto::StunIpv6); - let mut quic_ipv4_probes = ProbeSet::new(ProbeProto::QuicIpv4); - let mut quic_ipv6_probes = ProbeSet::new(ProbeProto::QuicIpv6); - - for attempt in 0..3 { - let delay = DEFAULT_INITIAL_RETRANSMIT * attempt as u32; - - if if_state.have_v4 { - stun_ipv4_probes - .push(Probe::StunIpv4 { - delay, - node: relay_node.clone(), - }) - .expect("adding StunIpv4 probe to a StunIpv4 probe set"); - quic_ipv4_probes - .push(Probe::QuicIpv4 { - delay, - node: relay_node.clone(), - }) - .expect("adding QuicIpv4 probe to a QuicIpv4 probe set"); - } - if if_state.have_v6 { - stun_ipv6_probes - .push(Probe::StunIpv6 { - delay, - node: relay_node.clone(), - }) - .expect("adding StunIpv6 probe to a StunIpv6 probe set"); - quic_ipv6_probes - .push(Probe::QuicIpv6 { - delay, - node: relay_node.clone(), - }) - .expect("adding QuicIpv6 probe to a QuicAddrIpv6 probe set"); - } - } - plan.add_if_enabled(stun_ipv4_probes); - plan.add_if_enabled(stun_ipv6_probes); - plan.add_if_enabled(quic_ipv4_probes); - plan.add_if_enabled(quic_ipv6_probes); - - // The HTTP and ICMP probes only start after the STUN probes have had a chance. - let mut https_probes = ProbeSet::new(ProbeProto::Https); - let mut icmp_v4_probes = ProbeSet::new(ProbeProto::IcmpV4); - let mut icmp_v6_probes = ProbeSet::new(ProbeProto::IcmpV6); - - for attempt in 0..3 { - let mut start = *max_high_prio_delay.get_or_insert_with(|| plan.max_delay()); - // if there are high priority probes, ensure there is a buffer between - // the highest probe delay and the next probes we create - // if there are no high priority probes, we don't need a buffer - if plan.has_priority_probes() { - start += DEFAULT_INITIAL_RETRANSMIT; - } - let delay = start + DEFAULT_INITIAL_RETRANSMIT * attempt as u32; - https_probes - .push(Probe::Https { - delay, - node: relay_node.clone(), - }) - .expect("adding Https probe to a Https probe set"); - if if_state.have_v4 { - icmp_v4_probes - .push(Probe::IcmpV4 { - delay, - node: relay_node.clone(), - }) - .expect("adding Icmp probe to an Icmp probe set"); - } - if if_state.have_v6 { - icmp_v6_probes - .push(Probe::IcmpV6 { - delay, - node: relay_node.clone(), - }) - .expect("adding IcmpIpv6 probe to and IcmpIpv6 probe set"); - } - } - - plan.add_if_enabled(https_probes); - plan.add_if_enabled(icmp_v4_probes); - plan.add_if_enabled(icmp_v6_probes); - } - plan - } - - /// Creates an initial probe plan for browsers. - /// - /// Here, we essentially only run HTTPS probes without any delays waiting for STUN. - #[cfg(wasm_browser)] - pub(super) fn initial(relay_map: &RelayMap, protocols: &BTreeSet) -> Self { - let mut plan = Self { - set: BTreeSet::new(), - protocols: protocols.clone(), - }; - - for relay_node in relay_map.nodes() { - let mut https_probes = ProbeSet::new(ProbeProto::Https); - - for attempt in 0u32..3 { - let delay = DEFAULT_INITIAL_RETRANSMIT * attempt; - https_probes - .push(Probe::Https { - delay, - node: relay_node.clone(), - }) - .expect("adding Https probe to a Https probe set"); - } - - plan.add_if_enabled(https_probes); - } - plan - } - - /// Creates a follow up probe plan using a previous net_report report in browsers. - /// - /// This will only schedule HTTPS probes. - #[cfg(not(wasm_browser))] - pub(super) fn with_last_report( - relay_map: &RelayMap, - last_report: &Report, - protocols: &BTreeSet, - if_state: &interfaces::State, - ) -> Self { - if last_report.relay_latency.is_empty() { - return Self::initial(relay_map, protocols, if_state); - } - let mut plan = Self { - set: Default::default(), - protocols: protocols.clone(), - }; - - // The first time we need add probes after the STUN we record this delay, so that - // further relay servers can reuse this delay. - let mut max_stun_delay: Option = None; - - let had_stun_ipv4 = !last_report.relay_v4_latency.is_empty(); - let had_stun_ipv6 = !last_report.relay_v6_latency.is_empty(); - let had_both = if_state.have_v6 && had_stun_ipv4 && had_stun_ipv6; - let sorted_relays = sort_relays(relay_map, last_report); - for (ri, (url, relay_node)) in sorted_relays.into_iter().enumerate() { - if ri == NUM_INCREMENTAL_RELAYS { - break; - } - let mut do4 = if_state.have_v4; - let mut do6 = if_state.have_v6; - - // By default, each node only gets one STUN packet sent, - // except the fastest two from the previous round. - let mut attempts = 1; - let is_fastest_two = ri < 2; - - if is_fastest_two { - attempts = 2; - } else if had_both { - // For dual stack machines, make the 3rd & slower nodes alternate between - // IPv4 and IPv6 for STUN and ICMP probes. - if ri % 2 == 0 { - (do4, do6) = (true, false); - } else { - (do4, do6) = (false, true); - } - } - if !is_fastest_two && !had_stun_ipv6 { - do6 = false; - } - if Some(url) == last_report.preferred_relay.as_ref() { - // But if we already had a relay home, try extra hard to - // make sure it's there so we don't flip flop around. - attempts = 4; - } - let retransmit_delay = last_report - .relay_latency - .get(url) - .map(|l| l * 120 / 100) // increases latency by 20%, why? - .unwrap_or(DEFAULT_ACTIVE_RETRANSMIT_DELAY); - - let mut stun_ipv4_probes = ProbeSet::new(ProbeProto::StunIpv4); - let mut stun_ipv6_probes = ProbeSet::new(ProbeProto::StunIpv6); - let mut quic_ipv4_probes = ProbeSet::new(ProbeProto::QuicIpv4); - let mut quic_ipv6_probes = ProbeSet::new(ProbeProto::QuicIpv6); - - for attempt in 0..attempts { - let delay = (retransmit_delay * attempt as u32) - + (ACTIVE_RETRANSMIT_EXTRA_DELAY * attempt as u32); - if do4 { - stun_ipv4_probes - .push(Probe::StunIpv4 { - delay, - node: relay_node.clone(), - }) - .expect("Pushing StunIpv4 Probe to StunIpv4 ProbeSet"); - quic_ipv4_probes - .push(Probe::QuicIpv4 { - delay, - node: relay_node.clone(), - }) - .expect("adding QuicIpv4 probe to a QuicAddrIpv4 probe set"); - } - if do6 { - stun_ipv6_probes - .push(Probe::StunIpv6 { - delay, - node: relay_node.clone(), - }) - .expect("Pushing StunIpv6 Probe to StunIpv6 ProbeSet"); - quic_ipv6_probes - .push(Probe::QuicIpv6 { - delay, - node: relay_node.clone(), - }) - .expect("adding QuicIpv6 probe to a QuicAddrIpv6 probe set"); - } - } - plan.add_if_enabled(stun_ipv4_probes); - plan.add_if_enabled(stun_ipv6_probes); - plan.add_if_enabled(quic_ipv4_probes); - plan.add_if_enabled(quic_ipv6_probes); - - // The HTTP and ICMP probes only start after the STUN probes have had a chance. - let mut https_probes = ProbeSet::new(ProbeProto::Https); - let mut icmp_v4_probes = ProbeSet::new(ProbeProto::IcmpV4); - let mut icmp_v6_probes = ProbeSet::new(ProbeProto::IcmpV6); - let start = *max_stun_delay.get_or_insert_with(|| plan.max_delay()); - for attempt in 0..attempts { - let delay = start - + (retransmit_delay * attempt as u32) - + (ACTIVE_RETRANSMIT_EXTRA_DELAY * (attempt as u32 + 1)); - https_probes - .push(Probe::Https { - delay, - node: relay_node.clone(), - }) - .expect("Pushing Https Probe to an Https ProbeSet"); - if do4 { - icmp_v4_probes - .push(Probe::IcmpV4 { - delay, - node: relay_node.clone(), - }) - .expect("Pushing IcmpV4 Probe to an Icmp ProbeSet"); - } - if do6 { - icmp_v6_probes - .push(Probe::IcmpV6 { - delay, - node: relay_node.clone(), - }) - .expect("Pusying IcmpV6 Probe to an IcmpV6 ProbeSet"); - } - } - - plan.add_if_enabled(https_probes); - plan.add_if_enabled(icmp_v4_probes); - plan.add_if_enabled(icmp_v6_probes); - } - plan - } - - #[cfg(wasm_browser)] - pub(super) fn with_last_report( - relay_map: &RelayMap, - last_report: &Report, - protocols: &BTreeSet, - ) -> Self { - if last_report.relay_latency.is_empty() { - return Self::initial(relay_map, protocols); - } - let mut plan = Self { - set: Default::default(), - protocols: protocols.clone(), - }; - - let sorted_relays = sort_relays(relay_map, last_report); - for (ri, (url, relay_node)) in sorted_relays.into_iter().enumerate() { - if ri == NUM_INCREMENTAL_RELAYS { - break; - } - - // By default, each node only gets one probe sent, - let mut attempts: u32 = 1; - // except the fastest two from the previous round. - if ri < 2 { - attempts = 2; - } - if Some(url) == last_report.preferred_relay.as_ref() { - // But if we already had a relay home, try extra hard to - // make sure it's there so we don't flip flop around. - attempts = 4; - } - let retransmit_delay = last_report - .relay_latency - .get(url) - .map(|l| l * 120 / 100) // increases latency by 20%, why? - .unwrap_or(DEFAULT_ACTIVE_RETRANSMIT_DELAY); - - let mut https_probes = ProbeSet::new(ProbeProto::Https); - for attempt in 0..attempts { - let delay = - (retransmit_delay * attempt) + (ACTIVE_RETRANSMIT_EXTRA_DELAY * (attempt + 1)); - https_probes - .push(Probe::Https { - delay, - node: relay_node.clone(), - }) - .expect("Pushing Https Probe to an Https ProbeSet"); - } - - plan.add_if_enabled(https_probes); - } - plan - } - - /// Returns an iterator over the [`ProbeSet`]s in this plan. - pub(super) fn iter(&self) -> impl Iterator { - self.set.iter() - } - - /// Adds a [`ProbeSet`] if it contains probes and the protocol indicated in - /// the [`ProbeSet] matches a protocol in our set of [`ProbeProto`]s. - fn add_if_enabled(&mut self, set: ProbeSet) { - if !set.is_empty() && self.protocols.contains(&set.proto) { - self.set.insert(set); - } - } - - /// Returns the delay of the last probe in the probe plan. - fn max_delay(&self) -> Duration { - self.set - .iter() - .flatten() - .map(|probe| probe.delay()) - .max() - .unwrap_or_default() - } - - /// Stun & Quic probes are "priority" probes - fn has_priority_probes(&self) -> bool { - #[cfg(not(wasm_browser))] - for probe in &self.set { - if matches!( - probe.proto, - ProbeProto::StunIpv4 - | ProbeProto::StunIpv6 - | ProbeProto::QuicIpv4 - | ProbeProto::QuicIpv6 - ) { - return true; - } - } - false - } -} - -impl fmt::Display for ProbePlan { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "ProbePlan {{")?; - for probe_set in self.set.iter() { - writeln!(f, r#" ProbeSet("{}") {{"#, probe_set.proto)?; - for probe in probe_set.probes.iter() { - writeln!(f, " {probe},")?; - } - writeln!(f, " }}")?; - } - writeln!(f, "}}") - } -} - -impl FromIterator for ProbePlan { - fn from_iter>(iter: T) -> Self { - Self { - set: iter.into_iter().collect(), - protocols: BTreeSet::new(), - } - } -} - -/// Sorts the nodes in the [`RelayMap`] from fastest to slowest. -/// -/// This uses the latencies from the last report to determine the order. Relay Nodes with no -/// data are at the end. -fn sort_relays<'a>( - relay_map: &'a RelayMap, - last_report: &Report, -) -> Vec<(&'a RelayUrl, &'a Arc)> { - let mut prev: Vec<_> = relay_map.nodes().collect(); - prev.sort_by(|a, b| { - let latencies_a = last_report.relay_latency.get(&a.url); - let latencies_b = last_report.relay_latency.get(&b.url); - match (latencies_a, latencies_b) { - (Some(_), None) => { - // Non-zero sorts before zero. - std::cmp::Ordering::Less - } - (None, Some(_)) => { - // Zero can't sort before anything else. - std::cmp::Ordering::Greater - } - (None, None) => { - // For both empty latencies sort by relay_id. - a.url.cmp(&b.url) - } - (Some(_), Some(_)) => match latencies_a.cmp(&latencies_b) { - std::cmp::Ordering::Equal => a.url.cmp(&b.url), - x => x, - }, - } - }); - - prev.into_iter().map(|n| (&n.url, n)).collect() -} - -#[cfg(test)] -mod tests { - use pretty_assertions::assert_eq; - use tracing_test::traced_test; - - use super::*; - use crate::net_report::{test_utils, RelayLatencies}; - - /// Shorthand which declares a new ProbeSet. - /// - /// `$kind`: The `ProbeProto`. - /// `$node`: Expression which will be an `Arc`. - /// `$delays`: A `Vec` of the delays for this probe. - macro_rules! probeset { - (proto: ProbeProto::$kind:ident, relay: $node:expr, delays: $delays:expr,) => { - ProbeSet { - proto: ProbeProto::$kind, - probes: $delays - .iter() - .map(|delay| Probe::$kind { - delay: *delay, - node: $node, - }) - .collect(), - } - }; - } - - fn default_protocols() -> BTreeSet { - BTreeSet::from([ - ProbeProto::StunIpv4, - ProbeProto::StunIpv6, - ProbeProto::QuicIpv4, - ProbeProto::QuicIpv6, - ProbeProto::IcmpV4, - ProbeProto::IcmpV6, - ProbeProto::Https, - ]) - } - - #[tokio::test] - async fn test_initial_probeplan() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let relay_node_1 = relay_map.nodes().next().unwrap(); - let relay_node_2 = relay_map.nodes().nth(1).unwrap(); - let if_state = interfaces::State::fake(); - let plan = ProbePlan::initial(&relay_map, &default_protocols(), &if_state); - - let mut expected_plan: ProbePlan = [ - probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::StunIpv6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::Https, - relay: relay_node_1.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_1.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_1.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::StunIpv6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::Https, - relay: relay_node_2.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_2.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_2.clone(), - delays: [Duration::from_millis(300), - Duration::from_millis(400), - Duration::from_millis(500)], - }, - ] - .into_iter() - .collect(); - expected_plan.protocols = default_protocols(); - - println!("expected:"); - println!("{expected_plan}"); - println!("actual:"); - println!("{plan}"); - // The readable error: - assert_eq!(plan.to_string(), expected_plan.to_string()); - // Just in case there's a bug in the Display impl: - assert_eq!(plan, expected_plan); - } - - #[tokio::test] - async fn test_initial_probeplan_some_protocols() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let relay_node_1 = relay_map.nodes().next().unwrap(); - let relay_node_2 = relay_map.nodes().nth(1).unwrap(); - let if_state = interfaces::State::fake(); - let plan = ProbePlan::initial( - &relay_map, - &BTreeSet::from([ProbeProto::Https, ProbeProto::IcmpV4, ProbeProto::IcmpV6]), - &if_state, - ); - - let mut expected_plan: ProbePlan = [ - probeset! { - proto: ProbeProto::Https, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::Https, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_millis(100), - Duration::from_millis(200)], - }, - ] - .into_iter() - .collect(); - expected_plan.protocols = - BTreeSet::from([ProbeProto::Https, ProbeProto::IcmpV4, ProbeProto::IcmpV6]); - - println!("expected:"); - println!("{expected_plan}"); - println!("actual:"); - println!("{plan}"); - // The readable error: - assert_eq!(plan.to_string(), expected_plan.to_string()); - // Just in case there's a bug in the Display impl: - assert_eq!(plan, expected_plan); - } - - #[tokio::test] - #[traced_test] - async fn test_plan_with_report() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let relay_node_1 = relay_map.nodes().next().unwrap().clone(); - let relay_node_2 = relay_map.nodes().nth(1).unwrap().clone(); - let if_state = interfaces::State::fake(); - - for i in 0..10 { - println!("round {}", i); - let mut latencies = RelayLatencies::new(); - latencies.update_relay(relay_node_1.url.clone(), Duration::from_millis(2)); - latencies.update_relay(relay_node_2.url.clone(), Duration::from_millis(2)); - let last_report = Report { - udp: true, - ipv6: true, - ipv4: true, - ipv6_can_send: true, - ipv4_can_send: true, - os_has_ipv6: true, - icmpv4: None, - icmpv6: None, - mapping_varies_by_dest_ip: Some(false), - mapping_varies_by_dest_ipv6: Some(false), - hair_pinning: Some(true), - portmap_probe: None, - preferred_relay: Some(relay_node_1.url.clone()), - relay_latency: latencies.clone(), - relay_v4_latency: latencies.clone(), - relay_v6_latency: latencies.clone(), - global_v4: None, - global_v6: None, - captive_portal: None, - }; - let plan = ProbePlan::with_last_report( - &relay_map, - &last_report, - &default_protocols(), - &if_state, - ); - let mut expected_plan: ProbePlan = [ - probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400), - Duration::from_micros(104_800), - Duration::from_micros(157_200)], - }, - probeset! { - proto: ProbeProto::StunIpv6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400), - Duration::from_micros(104_800), - Duration::from_micros(157_200)], - }, - probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400), - Duration::from_micros(104_800), - Duration::from_micros(157_200)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, - relay: relay_node_1.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400), - Duration::from_micros(104_800), - Duration::from_micros(157_200)], - }, - probeset! { - proto: ProbeProto::Https, - relay: relay_node_1.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600), - Duration::from_micros(312_000), - Duration::from_micros(364_400)], - }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_1.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600), - Duration::from_micros(312_000), - Duration::from_micros(364_400)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_1.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600), - Duration::from_micros(312_000), - Duration::from_micros(364_400)], - }, - probeset! { - proto: ProbeProto::StunIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400)], - }, - probeset! { - proto: ProbeProto::StunIpv6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400)], - }, - probeset! { - proto: ProbeProto::QuicIpv4, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400)], - }, - probeset! { - proto: ProbeProto::QuicIpv6, - relay: relay_node_2.clone(), - delays: [Duration::ZERO, - Duration::from_micros(52_400)], - }, - probeset! { - proto: ProbeProto::Https, - relay: relay_node_2.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600)], - }, - probeset! { - proto: ProbeProto::IcmpV4, - relay: relay_node_2.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600)], - }, - probeset! { - proto: ProbeProto::IcmpV6, - relay: relay_node_2.clone(), - delays: [Duration::from_micros(207_200), - Duration::from_micros(259_600)], - }, - ] - .into_iter() - .collect(); - expected_plan.protocols = default_protocols(); - - println!("{} round", i); - println!("expected:"); - println!("{expected_plan}"); - println!("actual:"); - println!("{plan}"); - // The readable error: - assert_eq!(plan.to_string(), expected_plan.to_string(), "{}", i); - // Just in case there's a bug in the Display impl: - assert_eq!(plan, expected_plan, "{}", i); - } - } - - fn create_last_report( - url_1: &RelayUrl, - latency_1: Option, - url_2: &RelayUrl, - latency_2: Option, - ) -> Report { - let mut latencies = RelayLatencies::new(); - if let Some(latency_1) = latency_1 { - latencies.update_relay(url_1.clone(), latency_1); - } - if let Some(latency_2) = latency_2 { - latencies.update_relay(url_2.clone(), latency_2); - } - Report { - udp: true, - ipv6: true, - ipv4: true, - ipv6_can_send: true, - ipv4_can_send: true, - os_has_ipv6: true, - icmpv4: None, - icmpv6: None, - mapping_varies_by_dest_ip: Some(false), - mapping_varies_by_dest_ipv6: Some(false), - hair_pinning: Some(true), - portmap_probe: None, - preferred_relay: Some(url_1.clone()), - relay_latency: latencies.clone(), - relay_v4_latency: latencies.clone(), - relay_v6_latency: latencies.clone(), - global_v4: None, - global_v6: None, - captive_portal: None, - } - } - - #[tokio::test] - #[traced_test] - async fn test_relay_sort_two_latencies() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let r1 = relay_map.nodes().next().unwrap(); - let r2 = relay_map.nodes().nth(1).unwrap(); - let last_report = create_last_report( - &r1.url, - Some(Duration::from_millis(1)), - &r2.url, - Some(Duration::from_millis(2)), - ); - let sorted: Vec<_> = sort_relays(&relay_map, &last_report) - .iter() - .map(|(url, _reg)| *url) - .collect(); - assert_eq!(sorted, vec![&r1.url, &r2.url]); - } - - #[tokio::test] - #[traced_test] - async fn test_relay_sort_equal_latencies() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let r1 = relay_map.nodes().next().unwrap(); - let r2 = relay_map.nodes().nth(1).unwrap(); - let last_report = create_last_report( - &r1.url, - Some(Duration::from_millis(2)), - &r2.url, - Some(Duration::from_millis(2)), - ); - let sorted: Vec<_> = sort_relays(&relay_map, &last_report) - .iter() - .map(|(url, _)| *url) - .collect(); - assert_eq!(sorted, vec![&r1.url, &r2.url]); - } - - #[tokio::test] - async fn test_relay_sort_missing_latency() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let r1 = relay_map.nodes().next().unwrap(); - let r2 = relay_map.nodes().nth(1).unwrap(); - - let last_report = - create_last_report(&r1.url, None, &r2.url, Some(Duration::from_millis(2))); - let sorted: Vec<_> = sort_relays(&relay_map, &last_report) - .iter() - .map(|(url, _)| *url) - .collect(); - assert_eq!(sorted, vec![&r2.url, &r1.url]); - - let last_report = - create_last_report(&r1.url, Some(Duration::from_millis(2)), &r2.url, None); - let sorted: Vec<_> = sort_relays(&relay_map, &last_report) - .iter() - .map(|(url, _)| *url) - .collect(); - assert_eq!(sorted, vec![&r1.url, &r2.url]); - } - - #[tokio::test] - #[traced_test] - async fn test_relay_sort_no_latency() { - let (_servers, relay_map) = test_utils::relay_map(2).await; - let r1 = relay_map.nodes().next().unwrap(); - let r2 = relay_map.nodes().nth(1).unwrap(); - - let last_report = create_last_report(&r1.url, None, &r2.url, None); - let sorted: Vec<_> = sort_relays(&relay_map, &last_report) - .iter() - .map(|(url, _)| *url) - .collect(); - // sorted by relay url only - assert_eq!(sorted, vec![&r1.url, &r2.url]); - } -} diff --git a/iroh/src/test_utils.rs b/iroh/src/test_utils.rs index 9e6c87def79..7a1b76e4f44 100644 --- a/iroh/src/test_utils.rs +++ b/iroh/src/test_utils.rs @@ -6,14 +6,12 @@ use iroh_base::RelayUrl; use iroh_relay::{ server::{ AccessConfig, CertConfig, QuicConfig, RelayConfig, Server, ServerConfig, SpawnError, - StunConfig, TlsConfig, + TlsConfig, }, RelayMap, RelayNode, RelayQuicConfig, }; use tokio::sync::oneshot; -use crate::defaults::DEFAULT_STUN_PORT; - /// A drop guard to clean up test infrastructure. /// /// After dropping the test infrastructure will asynchronously shutdown and release its @@ -24,47 +22,21 @@ use crate::defaults::DEFAULT_STUN_PORT; #[allow(dead_code)] pub struct CleanupDropGuard(pub(crate) oneshot::Sender<()>); -/// Runs a relay server with STUN and QUIC enabled suitable for tests. +/// Runs a relay server with QUIC enabled suitable for tests. /// /// The returned `Url` is the url of the relay server in the returned [`RelayMap`]. /// When dropped, the returned [`Server`] does will stop running. pub async fn run_relay_server() -> Result<(RelayMap, RelayUrl, Server), SpawnError> { - run_relay_server_with( - Some(StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - }), - true, - ) - .await -} - -/// Runs a relay server with STUN enabled suitable for tests. -/// -/// The returned `Url` is the url of the relay server in the returned [`RelayMap`]. -/// When dropped, the returned [`Server`] does will stop running. -pub async fn run_relay_server_with_stun() -> Result<(RelayMap, RelayUrl, Server), SpawnError> { - run_relay_server_with( - Some(StunConfig { - bind_addr: (Ipv4Addr::LOCALHOST, 0).into(), - }), - false, - ) - .await + run_relay_server_with(true).await } /// Runs a relay server. /// -/// `stun` can be set to `None` to disable stun, or set to `Some` `StunConfig`, -/// to enable stun on a specific socket. -/// /// If `quic` is set to `true`, it will make the appropriate [`QuicConfig`] from the generated tls certificates and run the quic server at a random free port. /// /// /// The return value is similar to [`run_relay_server`]. -pub async fn run_relay_server_with( - stun: Option, - quic: bool, -) -> Result<(RelayMap, RelayUrl, Server), SpawnError> { +pub async fn run_relay_server_with(quic: bool) -> Result<(RelayMap, RelayUrl, Server), SpawnError> { let (certs, server_config) = iroh_relay::server::testing::self_signed_tls_certs_and_config(); let tls = TlsConfig { @@ -90,7 +62,6 @@ pub async fn run_relay_server_with( access: AccessConfig::Everyone, }), quic, - stun, ..Default::default() }; let server = Server::spawn(config).await?; @@ -103,8 +74,6 @@ pub async fn run_relay_server_with( .map(|addr| RelayQuicConfig { port: addr.port() }); let n: RelayMap = RelayNode { url: url.clone(), - stun_only: false, - stun_port: server.stun_addr().map_or(DEFAULT_STUN_PORT, |s| s.port()), quic, } .into(); diff --git a/iroh/src/util.rs b/iroh/src/util.rs index 21b5a85f7aa..d442c1c5aa4 100644 --- a/iroh/src/util.rs +++ b/iroh/src/util.rs @@ -73,7 +73,7 @@ impl Future for MaybeFuture { mod tests { use std::pin::pin; - use tokio::time::Duration; + use n0_future::time::Duration; use super::*; diff --git a/iroh/tests/integration.rs b/iroh/tests/integration.rs index 72aeadfd0cc..af69de113d6 100644 --- a/iroh/tests/integration.rs +++ b/iroh/tests/integration.rs @@ -34,6 +34,7 @@ const ECHO_ALPN: &[u8] = b"echo"; #[test] async fn simple_node_id_based_connection_transfer() -> Result { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); setup_logging(); let client = Endpoint::builder().discovery_n0().bind().await?;