diff --git a/Cargo.lock b/Cargo.lock
index d3b66a81de5..f20b7645137 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -671,6 +671,16 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "console_error_panic_hook"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen",
+]
+
[[package]]
name = "const-oid"
version = "0.9.6"
@@ -1163,7 +1173,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e"
dependencies = [
"libc",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -2225,6 +2235,7 @@ dependencies = [
"bytes",
"cfg_aliases",
"clap",
+ "console_error_panic_hook",
"crypto_box",
"data-encoding",
"der",
@@ -2249,7 +2260,7 @@ dependencies = [
"n0-watcher",
"nested_enum_utils",
"netdev",
- "netwatch 0.6.0",
+ "netwatch",
"parse-size",
"pin-project",
"pkarr",
@@ -2463,7 +2474,7 @@ dependencies = [
"once_cell",
"socket2",
"tracing",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -2517,7 +2528,6 @@ dependencies = [
"simdutf8",
"snafu",
"strum",
- "stun-rs",
"time",
"tokio",
"tokio-rustls",
@@ -2542,7 +2552,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi",
"libc",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -2945,39 +2955,6 @@ dependencies = [
"tokio",
]
-[[package]]
-name = "netwatch"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67eeaa5f7505c93c5a9b35ba84fd21fb8aa3f24678c76acfe8716af7862fb07a"
-dependencies = [
- "atomic-waker",
- "bytes",
- "cfg_aliases",
- "derive_more",
- "iroh-quinn-udp",
- "js-sys",
- "libc",
- "n0-future",
- "nested_enum_utils",
- "netdev",
- "netlink-packet-core",
- "netlink-packet-route 0.23.0",
- "netlink-proto",
- "netlink-sys",
- "serde",
- "snafu",
- "socket2",
- "time",
- "tokio",
- "tokio-util",
- "tracing",
- "web-sys",
- "windows 0.59.0",
- "windows-result 0.3.2",
- "wmi",
-]
-
[[package]]
name = "netwatch"
version = "0.6.0"
@@ -3478,9 +3455,9 @@ checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
[[package]]
name = "portmapper"
-version = "0.5.0"
+version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d6db66007eac4a0ec8331d0d20c734bd64f6445d64bbaf0d0a27fea7a054e36"
+checksum = "f651ba57abd6d766deb1b86f45b50c189db69204f20126e84f033168c1bf0853"
dependencies = [
"base64",
"bytes",
@@ -3492,7 +3469,7 @@ dependencies = [
"iroh-metrics",
"libc",
"nested_enum_utils",
- "netwatch 0.5.0",
+ "netwatch",
"num_enum",
"rand 0.8.5",
"serde",
@@ -3711,7 +3688,7 @@ dependencies = [
"once_cell",
"socket2",
"tracing",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -4050,7 +4027,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -4152,7 +4129,7 @@ dependencies = [
"security-framework",
"security-framework-sys",
"webpki-root-certs 0.26.11",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -4748,7 +4725,7 @@ dependencies = [
"getrandom 0.3.2",
"once_cell",
"rustix",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
]
[[package]]
@@ -5573,7 +5550,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.48.0",
]
[[package]]
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 2a945877821..662f3b00179 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,7 +2,7 @@ FROM rust:alpine AS chef
RUN update-ca-certificates
RUN apk add --no-cache musl-dev openssl-dev pkgconfig
-RUN cargo install cargo-chef
+RUN cargo install cargo-chef
WORKDIR /iroh
FROM chef AS planner
@@ -40,7 +40,7 @@ RUN chmod +x /iroh-relay
WORKDIR /
# expose the default ports
-# http, https, stun, metrics
+# http, https, metrics
EXPOSE 80 443 3478/udp 9090
ENTRYPOINT ["/iroh-relay"]
CMD [""]
@@ -62,4 +62,4 @@ WORKDIR /
# dns, metrics
EXPOSE 53/udp 9090
ENTRYPOINT ["/iroh-dns-server"]
-CMD [""]
\ No newline at end of file
+CMD [""]
diff --git a/example.config.toml b/example.config.toml
index 04d5350b9f3..f36385f79d3 100644
--- a/example.config.toml
+++ b/example.config.toml
@@ -1,5 +1,2 @@
[[relay_nodes]]
url = "https://foo.bar"
-stun_only = false
-stun_port = 1244
-
diff --git a/iroh-relay/Cargo.toml b/iroh-relay/Cargo.toml
index 4b66b094d5b..6dfeb2615de 100644
--- a/iroh-relay/Cargo.toml
+++ b/iroh-relay/Cargo.toml
@@ -51,7 +51,6 @@ reqwest = { version = "0.12", default-features = false, features = [
rustls = { version = "0.23", default-features = false, features = ["ring"] }
serde = { version = "1", features = ["derive", "rc"] }
strum = { version = "0.26", features = ["derive"] }
-stun-rs = "0.1.11"
tokio = { version = "1", features = [
"io-util",
"macros",
diff --git a/iroh-relay/README.md b/iroh-relay/README.md
index a87a573af00..e6711b636c5 100644
--- a/iroh-relay/README.md
+++ b/iroh-relay/README.md
@@ -15,10 +15,10 @@ relays, including:
- Relay Protocol: The protocol used to communicate between relay servers and
clients
- Relay Server: A fully-fledged iroh-relay server over HTTP or HTTPS.
- Optionally will also expose a stun endpoint and metrics.
+ Optionally will also expose a QAD endpoint and metrics.
- Relay Client: A client for establishing connections to the relay.
- Server Binary: A CLI for running your own relay server. It can be configured
- to also offer STUN support and expose metrics.
+ to also expose metrics.
Used in [iroh], created with love by the [n0 team](https://n0.computer/).
diff --git a/iroh-relay/src/defaults.rs b/iroh-relay/src/defaults.rs
index 12043a0f36e..47959c285bf 100644
--- a/iroh-relay/src/defaults.rs
+++ b/iroh-relay/src/defaults.rs
@@ -1,10 +1,5 @@
//! Default values used in the relay.
-/// The default STUN port used by the Relay server.
-///
-/// The STUN port as defined by [RFC 8489]()
-pub const DEFAULT_STUN_PORT: u16 = 3478;
-
/// The default QUIC port used by the Relay server to accept QUIC connections
/// for QUIC address discovery
///
diff --git a/iroh-relay/src/dns.rs b/iroh-relay/src/dns.rs
index b093971e9c1..e76dff745e8 100644
--- a/iroh-relay/src/dns.rs
+++ b/iroh-relay/src/dns.rs
@@ -4,6 +4,7 @@ use std::{
fmt,
future::Future,
net::{IpAddr, Ipv6Addr, SocketAddr},
+ sync::Arc,
};
use hickory_resolver::{name_server::TokioConnectionProvider, TokioResolver};
@@ -14,6 +15,7 @@ use n0_future::{
};
use nested_enum_utils::common_fields;
use snafu::{Backtrace, GenerateImplicitData, OptionExt, Snafu};
+use tokio::sync::RwLock;
use url::Url;
use crate::node_info::{LookupError, NodeInfo};
@@ -77,7 +79,10 @@ impl StaggeredError {
/// The DNS resolver used throughout `iroh`.
#[derive(Debug, Clone)]
-pub struct DnsResolver(TokioResolver);
+pub struct DnsResolver {
+ resolver: Arc>,
+ nameserver: Option,
+}
impl DnsResolver {
/// Create a new DNS resolver with sensible cross-platform defaults.
@@ -86,6 +91,14 @@ impl DnsResolver {
/// This does not work at least on some Androids, therefore we fallback
/// to the default `ResolverConfig` which uses eg. to google's `8.8.8.8` or `8.8.4.4`.
pub fn new() -> Self {
+ let resolver = Self::new_inner();
+ Self {
+ resolver: Arc::new(RwLock::new(resolver)),
+ nameserver: None,
+ }
+ }
+
+ fn new_inner() -> TokioResolver {
let (system_config, mut options) =
hickory_resolver::system_conf::read_system_conf().unwrap_or_default();
@@ -110,11 +123,19 @@ impl DnsResolver {
let mut builder =
TokioResolver::builder_with_config(config, TokioConnectionProvider::default());
*builder.options_mut() = options;
- DnsResolver(builder.build())
+ builder.build()
}
/// Create a new DNS resolver configured with a single UDP DNS nameserver.
pub fn with_nameserver(nameserver: SocketAddr) -> Self {
+ let resolver = Self::with_nameserver_inner(nameserver);
+ Self {
+ resolver: Arc::new(RwLock::new(resolver)),
+ nameserver: Some(nameserver),
+ }
+ }
+
+ fn with_nameserver_inner(nameserver: SocketAddr) -> TokioResolver {
let mut config = hickory_resolver::config::ResolverConfig::new();
let nameserver_config = hickory_resolver::config::NameServerConfig::new(
nameserver,
@@ -124,12 +145,24 @@ impl DnsResolver {
let builder =
TokioResolver::builder_with_config(config, TokioConnectionProvider::default());
- DnsResolver(builder.build())
+ builder.build()
}
/// Removes all entries from the cache.
- pub fn clear_cache(&self) {
- self.0.clear_cache();
+ pub async fn clear_cache(&self) {
+ self.resolver.read().await.clear_cache();
+ }
+
+ /// Recreate the inner resolver
+ pub async fn reset(&self) {
+ let mut this = self.resolver.write().await;
+ let resolver = if let Some(nameserver) = self.nameserver {
+ Self::with_nameserver_inner(nameserver)
+ } else {
+ Self::new_inner()
+ };
+
+ *this = resolver;
}
/// Lookup a TXT record.
@@ -139,7 +172,8 @@ impl DnsResolver {
timeout: Duration,
) -> Result {
let host = host.to_string();
- let res = time::timeout(timeout, self.0.txt_lookup(host)).await??;
+ let this = self.resolver.read().await;
+ let res = time::timeout(timeout, this.txt_lookup(host)).await??;
Ok(TxtLookup(res))
}
@@ -150,7 +184,8 @@ impl DnsResolver {
timeout: Duration,
) -> Result, DnsError> {
let host = host.to_string();
- let addrs = time::timeout(timeout, self.0.ipv4_lookup(host)).await??;
+ let this = self.resolver.read().await;
+ let addrs = time::timeout(timeout, this.ipv4_lookup(host)).await??;
Ok(addrs.into_iter().map(|ip| IpAddr::V4(ip.0)))
}
@@ -161,7 +196,8 @@ impl DnsResolver {
timeout: Duration,
) -> Result, DnsError> {
let host = host.to_string();
- let addrs = time::timeout(timeout, self.0.ipv6_lookup(host)).await??;
+ let this = self.resolver.read().await;
+ let addrs = time::timeout(timeout, this.ipv6_lookup(host)).await??;
Ok(addrs.into_iter().map(|ip| IpAddr::V6(ip.0)))
}
@@ -349,7 +385,10 @@ impl Default for DnsResolver {
impl From for DnsResolver {
fn from(resolver: TokioResolver) -> Self {
- DnsResolver(resolver)
+ DnsResolver {
+ resolver: Arc::new(RwLock::new(resolver)),
+ nameserver: None,
+ }
}
}
diff --git a/iroh-relay/src/lib.rs b/iroh-relay/src/lib.rs
index 71a18d7aaf3..e561ddaf4ad 100644
--- a/iroh-relay/src/lib.rs
+++ b/iroh-relay/src/lib.rs
@@ -20,10 +20,10 @@
doc = "- `server`: A fully-fledged iroh-relay server over HTTP or HTTPS."
)]
//!
-//! Optionally will also expose a stun endpoint and metrics. (requires the feature flag `server`)
+//! Optionally will also expose a QAD endpoint and metrics. (requires the feature flag `server`)
//! - [`client`]: A client for establishing connections to the relay.
//! - *Server Binary*: A CLI for running your own relay server. It can be configured to also offer
-//! STUN support and expose metrics.
+//! QAD support and expose metrics.
// Based on tailscale/derp/derp.go
#![cfg_attr(iroh_docsrs, feature(doc_auto_cfg))]
diff --git a/iroh-relay/src/main.rs b/iroh-relay/src/main.rs
index 3aa4da21546..8bcf7f597e6 100644
--- a/iroh-relay/src/main.rs
+++ b/iroh-relay/src/main.rs
@@ -16,7 +16,6 @@ use iroh_base::NodeId;
use iroh_relay::{
defaults::{
DEFAULT_HTTPS_PORT, DEFAULT_HTTP_PORT, DEFAULT_METRICS_PORT, DEFAULT_RELAY_QUIC_PORT,
- DEFAULT_STUN_PORT,
},
server::{self as relay, ClientRateLimit, QuicConfig},
};
@@ -114,7 +113,7 @@ struct Config {
///
/// Defaults to `true`.
///
- /// Disabling will leave only the STUN server. The `http_bind_addr` and `tls`
+ /// Disabling will leave only the quic server. The `http_bind_addr` and `tls`
/// configuration options will be ignored.
#[serde(default = "cfg_defaults::enable_relay")]
enable_relay: bool,
@@ -140,15 +139,6 @@ struct Config {
///
/// Must exist if `enable_quic_addr_discovery` is `true`.
tls: Option,
- /// Whether to run a STUN server. It will bind to the same IP as the `addr` field.
- ///
- /// Defaults to `true`.
- #[serde(default = "cfg_defaults::enable_stun")]
- enable_stun: bool,
- /// The socket address to bind the STUN server on.
- ///
- /// Defaults to using the `http_bind_addr` with the port set to [`DEFAULT_STUN_PORT`].
- stun_bind_addr: Option,
/// Whether to allow QUIC connections for QUIC address discovery
///
/// If no `tls` is set, this will error.
@@ -174,7 +164,7 @@ struct Config {
key_cache_capacity: Option,
/// Access control for relaying connections.
///
- /// This controls which nodes are allowed to relay connections, other endpoints, like STUN are not controlled by this.
+ /// This controls which nodes are allowed to relay connections, other endpoints are not controlled by this.
#[serde(default)]
access: AccessConfig,
}
@@ -314,11 +304,6 @@ impl Config {
.unwrap_or((Ipv6Addr::UNSPECIFIED, DEFAULT_HTTP_PORT).into())
}
- fn stun_bind_addr(&self) -> SocketAddr {
- self.stun_bind_addr
- .unwrap_or_else(|| SocketAddr::new(self.http_bind_addr().ip(), DEFAULT_STUN_PORT))
- }
-
fn metrics_bind_addr(&self) -> SocketAddr {
self.metrics_bind_addr
.unwrap_or_else(|| SocketAddr::new(self.http_bind_addr().ip(), DEFAULT_METRICS_PORT))
@@ -331,8 +316,6 @@ impl Default for Config {
enable_relay: cfg_defaults::enable_relay(),
http_bind_addr: None,
tls: None,
- enable_stun: cfg_defaults::enable_stun(),
- stun_bind_addr: None,
enable_quic_addr_discovery: cfg_defaults::enable_quic_addr_discovery(),
limits: None,
enable_metrics: cfg_defaults::enable_metrics(),
@@ -352,10 +335,6 @@ mod cfg_defaults {
true
}
- pub(crate) fn enable_stun() -> bool {
- true
- }
-
pub(crate) fn enable_quic_addr_discovery() -> bool {
false
}
@@ -722,12 +701,8 @@ async fn build_relay_config(cfg: Config) -> Result,
- #[snafu(implicit)]
- span_trace: n0_snafu::SpanTrace,
-})]
-#[allow(missing_docs)]
-#[derive(Debug, Snafu)]
-#[non_exhaustive]
-pub enum StunError {
- /// The STUN message could not be parsed or is otherwise invalid.
- #[snafu(display("invalid message"))]
- InvalidMessage {},
- /// STUN request is not a binding request when it should be.
- #[snafu(display("not binding"))]
- NotBinding {},
- /// STUN packet is not a response when it should be.
- #[snafu(display("not success response"))]
- NotSuccessResponse {},
- /// STUN response has malformed attributes.
- #[snafu(display("malformed attributes"))]
- MalformedAttrs {},
- /// STUN request didn't end in fingerprint.
- #[snafu(display("no fingerprint"))]
- NoFingerprint {},
- /// STUN request had bogus fingerprint.
- #[snafu(display("invalid fingerprint"))]
- InvalidFingerprint {},
-}
-
-/// Generates a binding request STUN packet.
-pub fn request(tx: TransactionId) -> Vec {
- let fp = Fingerprint::default();
- let msg = StunMessageBuilder::new(methods::BINDING, MessageClass::Request)
- .with_transaction_id(tx)
- .with_attribute(fp)
- .build();
-
- let encoder = MessageEncoderBuilder::default().build();
- let mut buffer = vec![0u8; 150];
- let size = encoder.encode(&mut buffer, &msg).expect("invalid encoding");
- buffer.truncate(size);
- buffer
-}
-
-/// Generates a binding response.
-pub fn response(tx: TransactionId, addr: SocketAddr) -> Vec {
- let msg = StunMessageBuilder::new(methods::BINDING, MessageClass::SuccessResponse)
- .with_transaction_id(tx)
- .with_attribute(XorMappedAddress::from(addr))
- .build();
-
- let encoder = MessageEncoderBuilder::default().build();
- let mut buffer = vec![0u8; 150];
- let size = encoder.encode(&mut buffer, &msg).expect("invalid encoding");
- buffer.truncate(size);
- buffer
-}
-
-// Copied from stun_rs
-// const MAGIC_COOKIE: Cookie = Cookie(0x2112_A442);
-const COOKIE: [u8; 4] = 0x2112_A442u32.to_be_bytes();
-
-/// Reports whether b is a STUN message.
-pub fn is(b: &[u8]) -> bool {
- b.len() >= stun_rs::MESSAGE_HEADER_SIZE &&
- b[0]&0b11000000 == 0 && // top two bits must be zero
- b[4..8] == COOKIE
-}
-
-/// Parses a STUN binding request.
-pub fn parse_binding_request(b: &[u8]) -> Result {
- let ctx = DecoderContextBuilder::default()
- .with_validation() // ensure fingerprint is validated
- .build();
- let decoder = MessageDecoderBuilder::default().with_context(ctx).build();
- let (msg, _) = decoder.decode(b).map_err(|_| InvalidMessageSnafu.build())?;
-
- let tx = *msg.transaction_id();
- if msg.method() != methods::BINDING {
- return Err(NotBindingSnafu.build());
- }
-
- // TODO: Tailscale sets the software to tailscale, we should check if we want to do this too.
-
- if msg
- .attributes()
- .last()
- .map(|attr| !attr.is_fingerprint())
- .unwrap_or_default()
- {
- return Err(NoFingerprintSnafu.build());
- }
-
- Ok(tx)
-}
-
-/// Parses a successful binding response STUN packet.
-/// The IP address is extracted from the XOR-MAPPED-ADDRESS attribute.
-pub fn parse_response(b: &[u8]) -> Result<(TransactionId, SocketAddr), StunError> {
- let decoder = MessageDecoder::default();
- let (msg, _) = decoder.decode(b).map_err(|_| InvalidMessageSnafu.build())?;
-
- let tx = *msg.transaction_id();
- if msg.class() != MessageClass::SuccessResponse {
- return Err(NotSuccessResponseSnafu.build());
- }
-
- // Read through the attributes.
- // The the addr+port reported by XOR-MAPPED-ADDRESS
- // as the canonical value. If the attribute is not
- // present but the STUN server responds with
- // MAPPED-ADDRESS we fall back to it.
-
- let mut addr = None;
- let mut fallback_addr = None;
- for attr in msg.attributes() {
- match attr {
- StunAttribute::XorMappedAddress(a) => {
- let mut a = *a.socket_address();
- a.set_ip(a.ip().to_canonical());
- addr = Some(a);
- }
- StunAttribute::MappedAddress(a) => {
- let mut a = *a.socket_address();
- a.set_ip(a.ip().to_canonical());
- fallback_addr = Some(a);
- }
- _ => {}
- }
- }
-
- if let Some(addr) = addr {
- return Ok((tx, addr));
- }
-
- if let Some(addr) = fallback_addr {
- return Ok((tx, addr));
- }
-
- Err(MalformedAttrsSnafu.build())
-}
-
-#[cfg(test)]
-mod tests {
-
- use std::net::{IpAddr, Ipv4Addr};
-
- use super::*;
-
- struct ResponseTestCase {
- name: &'static str,
- data: Vec,
- want_tid: Vec,
- want_addr: IpAddr,
- want_port: u16,
- }
-
- #[test]
- fn test_parse_response() {
- let cases = vec![
- ResponseTestCase {
- name: "google-1",
- data: vec![
- 0x01, 0x01, 0x00, 0x0c, 0x21, 0x12, 0xa4, 0x42,
- 0x23, 0x60, 0xb1, 0x1e, 0x3e, 0xc6, 0x8f, 0xfa,
- 0x93, 0xe0, 0x80, 0x07, 0x00, 0x20, 0x00, 0x08,
- 0x00, 0x01, 0xc7, 0x86, 0x69, 0x57, 0x85, 0x6f,
- ],
- want_tid: vec![
- 0x23, 0x60, 0xb1, 0x1e, 0x3e, 0xc6, 0x8f, 0xfa,
- 0x93, 0xe0, 0x80, 0x07,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])),
- want_port: 59028,
- },
- ResponseTestCase {
- name: "google-2",
- data: vec![
- 0x01, 0x01, 0x00, 0x0c, 0x21, 0x12, 0xa4, 0x42,
- 0xf9, 0xf1, 0x21, 0xcb, 0xde, 0x7d, 0x7c, 0x75,
- 0x92, 0x3c, 0xe2, 0x71, 0x00, 0x20, 0x00, 0x08,
- 0x00, 0x01, 0xc7, 0x87, 0x69, 0x57, 0x85, 0x6f,
- ],
- want_tid: vec![
- 0xf9, 0xf1, 0x21, 0xcb, 0xde, 0x7d, 0x7c, 0x75,
- 0x92, 0x3c, 0xe2, 0x71,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])),
- want_port: 59029,
- },
- ResponseTestCase{
- name: "stun.sipgate.net:10000",
- data: vec![
- 0x01, 0x01, 0x00, 0x44, 0x21, 0x12, 0xa4, 0x42,
- 0x48, 0x2e, 0xb6, 0x47, 0x15, 0xe8, 0xb2, 0x8e,
- 0xae, 0xad, 0x64, 0x44, 0x00, 0x01, 0x00, 0x08,
- 0x00, 0x01, 0xe4, 0xab, 0x48, 0x45, 0x21, 0x2d,
- 0x00, 0x04, 0x00, 0x08, 0x00, 0x01, 0x27, 0x10,
- 0xd9, 0x0a, 0x44, 0x98, 0x00, 0x05, 0x00, 0x08,
- 0x00, 0x01, 0x27, 0x11, 0xd9, 0x74, 0x7a, 0x8a,
- 0x80, 0x20, 0x00, 0x08, 0x00, 0x01, 0xc5, 0xb9,
- 0x69, 0x57, 0x85, 0x6f, 0x80, 0x22, 0x00, 0x10,
- 0x56, 0x6f, 0x76, 0x69, 0x64, 0x61, 0x2e, 0x6f,
- 0x72, 0x67, 0x20, 0x30, 0x2e, 0x39, 0x36, 0x00,
- ],
- want_tid: vec![
- 0x48, 0x2e, 0xb6, 0x47, 0x15, 0xe8, 0xb2, 0x8e,
- 0xae, 0xad, 0x64, 0x44,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])),
- want_port: 58539,
- },
- ResponseTestCase{
- name: "stun.powervoip.com:3478",
- data: vec![
- 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42,
- 0x7e, 0x57, 0x96, 0x68, 0x29, 0xf4, 0x44, 0x60,
- 0x9d, 0x1d, 0xea, 0xa6, 0x00, 0x01, 0x00, 0x08,
- 0x00, 0x01, 0xe9, 0xd3, 0x48, 0x45, 0x21, 0x2d,
- 0x00, 0x04, 0x00, 0x08, 0x00, 0x01, 0x0d, 0x96,
- 0x4d, 0x48, 0xa9, 0xd4, 0x00, 0x05, 0x00, 0x08,
- 0x00, 0x01, 0x0d, 0x97, 0x4d, 0x48, 0xa9, 0xd5,
- ],
- want_tid: vec![
- 0x7e, 0x57, 0x96, 0x68, 0x29, 0xf4, 0x44, 0x60,
- 0x9d, 0x1d, 0xea, 0xa6,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([72, 69, 33, 45])),
- want_port: 59859,
- },
- ResponseTestCase{
- name: "in-process pion server",
- data: vec![
- 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42,
- 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c,
- 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x0a,
- 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74,
- 0x65, 0x72, 0x00, 0x00, 0x00, 0x20, 0x00, 0x08,
- 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43,
- 0x80, 0x28, 0x00, 0x04, 0xb6, 0x99, 0xbb, 0x02,
- 0x01, 0x01, 0x00, 0x24, 0x21, 0x12, 0xa4, 0x42,
- ],
- want_tid: vec![
- 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c,
- 0x4f, 0x3e, 0x30, 0x8e,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])),
- want_port: 61300,
- },
- ResponseTestCase{
- name: "stuntman-server ipv6",
- data: vec![
- 0x01, 0x01, 0x00, 0x48, 0x21, 0x12, 0xa4, 0x42,
- 0x06, 0xf5, 0x66, 0x85, 0xd2, 0x8a, 0xf3, 0xe6,
- 0x9c, 0xe3, 0x41, 0xe2, 0x00, 0x01, 0x00, 0x14,
- 0x00, 0x02, 0x90, 0xce, 0x26, 0x02, 0x00, 0xd1,
- 0xb4, 0xcf, 0xc1, 0x00, 0x38, 0xb2, 0x31, 0xff,
- 0xfe, 0xef, 0x96, 0xf6, 0x80, 0x2b, 0x00, 0x14,
- 0x00, 0x02, 0x0d, 0x96, 0x26, 0x04, 0xa8, 0x80,
- 0x00, 0x02, 0x00, 0xd1, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0xc5, 0x70, 0x01, 0x00, 0x20, 0x00, 0x14,
- 0x00, 0x02, 0xb1, 0xdc, 0x07, 0x10, 0xa4, 0x93,
- 0xb2, 0x3a, 0xa7, 0x85, 0xea, 0x38, 0xc2, 0x19,
- 0x62, 0x0c, 0xd7, 0x14,
- ],
- want_tid: vec![
- 6, 245, 102, 133, 210, 138, 243, 230, 156, 227,
- 65, 226,
- ],
- want_addr: "2602:d1:b4cf:c100:38b2:31ff:feef:96f6".parse().unwrap(),
- want_port: 37070,
- },
- // Testing STUN attribute padding rules using STUN software attribute
- // with values of 1 & 3 length respectively before the XorMappedAddress attribute
- ResponseTestCase {
- name: "software-a",
- data: vec![
- 0x01, 0x01, 0x00, 0x14, 0x21, 0x12, 0xa4, 0x42,
- 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c,
- 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x01,
- 0x61, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x08,
- 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43,
- ],
- want_tid: vec![
- 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c,
- 0x4f, 0x3e, 0x30, 0x8e,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])),
- want_port: 61300,
- },
- ResponseTestCase {
- name: "software-abc",
- data: vec![
- 0x01, 0x01, 0x00, 0x14, 0x21, 0x12, 0xa4, 0x42,
- 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c,
- 0x4f, 0x3e, 0x30, 0x8e, 0x80, 0x22, 0x00, 0x03,
- 0x61, 0x62, 0x63, 0x00, 0x00, 0x20, 0x00, 0x08,
- 0x00, 0x01, 0xce, 0x66, 0x5e, 0x12, 0xa4, 0x43,
- ],
- want_tid: vec![
- 0xeb, 0xc2, 0xd3, 0x6e, 0xf4, 0x71, 0x21, 0x7c,
- 0x4f, 0x3e, 0x30, 0x8e,
- ],
- want_addr: IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])),
- want_port: 61300,
- },
- ResponseTestCase {
- name: "no-4in6",
- data: data_encoding::HEXLOWER.decode(b"010100182112a4424fd5d202dcb37d31fc773306002000140002cd3d2112a4424fd5d202dcb382ce2dc3fcc7").unwrap(),
- want_tid: vec![79, 213, 210, 2, 220, 179, 125, 49, 252, 119, 51, 6],
- want_addr: IpAddr::V4(Ipv4Addr::from([209, 180, 207, 193])),
- want_port: 60463,
- },
- ];
-
- for (i, test) in cases.into_iter().enumerate() {
- println!("Case {i}: {}", test.name);
- let (tx, addr_port) = parse_response(&test.data).unwrap();
- assert!(is(&test.data));
- assert_eq!(tx.as_bytes(), &test.want_tid[..]);
- assert_eq!(addr_port.ip(), test.want_addr);
- assert_eq!(addr_port.port(), test.want_port);
- }
- }
-
- #[test]
- fn test_parse_binding_request() {
- let tx = TransactionId::default();
- let req = request(tx);
- assert!(is(&req));
- let got_tx = parse_binding_request(&req).unwrap();
- assert_eq!(got_tx, tx);
- }
-
- #[test]
- fn test_stun_cookie() {
- assert_eq!(stun_rs::MAGIC_COOKIE, COOKIE);
- }
-
- #[test]
- fn test_response() {
- let txn = |n| TransactionId::from([n; 12]);
-
- struct Case {
- tx: TransactionId,
- addr: IpAddr,
- port: u16,
- }
- let tests = vec![
- Case {
- tx: txn(1),
- addr: "1.2.3.4".parse().unwrap(),
- port: 254,
- },
- Case {
- tx: txn(2),
- addr: "1.2.3.4".parse().unwrap(),
- port: 257,
- },
- Case {
- tx: txn(3),
- addr: "1::4".parse().unwrap(),
- port: 254,
- },
- Case {
- tx: txn(4),
- addr: "1::4".parse().unwrap(),
- port: 257,
- },
- ];
-
- for tt in tests {
- let res = response(tt.tx, SocketAddr::new(tt.addr, tt.port));
- assert!(is(&res));
- let (tx2, addr2) = parse_response(&res).unwrap();
- assert_eq!(tt.tx, tx2);
- assert_eq!(tt.addr, addr2.ip());
- assert_eq!(tt.port, addr2.port());
- }
- }
-}
diff --git a/iroh-relay/src/quic.rs b/iroh-relay/src/quic.rs
index 9f186e45dad..2f6c0d277fc 100644
--- a/iroh-relay/src/quic.rs
+++ b/iroh-relay/src/quic.rs
@@ -4,10 +4,7 @@ use std::{net::SocketAddr, sync::Arc};
use n0_future::time::Duration;
use nested_enum_utils::common_fields;
-use quinn::{
- crypto::rustls::{NoInitialCipherSuite, QuicClientConfig},
- VarInt,
-};
+use quinn::{crypto::rustls::QuicClientConfig, VarInt};
use snafu::{Backtrace, Snafu};
use tokio::sync::watch;
@@ -20,7 +17,10 @@ pub const QUIC_ADDR_DISC_CLOSE_REASON: &[u8] = b"finished";
#[cfg(feature = "server")]
pub(crate) mod server {
- use quinn::{crypto::rustls::QuicServerConfig, ApplicationClose, ConnectionError};
+ use quinn::{
+ crypto::rustls::{NoInitialCipherSuite, QuicServerConfig},
+ ApplicationClose, ConnectionError,
+ };
use snafu::ResultExt;
use tokio::task::JoinSet;
use tokio_util::{sync::CancellationToken, task::AbortOnDropHandle};
@@ -243,12 +243,10 @@ pub enum Error {
Connection { source: quinn::ConnectionError },
#[snafu(transparent)]
WatchRecv { source: watch::error::RecvError },
- #[snafu(transparent)]
- NoIntitialCipherSuite { source: NoInitialCipherSuite },
}
/// Handles the client side of QUIC address discovery.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct QuicClient {
/// A QUIC Endpoint.
ep: quinn::Endpoint,
@@ -259,16 +257,14 @@ pub struct QuicClient {
impl QuicClient {
/// Create a new QuicClient to handle the client side of QUIC
/// address discovery.
- pub fn new(
- ep: quinn::Endpoint,
- mut client_config: rustls::ClientConfig,
- ) -> Result {
+ pub fn new(ep: quinn::Endpoint, mut client_config: rustls::ClientConfig) -> Self {
// add QAD alpn
client_config.alpn_protocols = vec![ALPN_QUIC_ADDR_DISC.into()];
// go from rustls client config to rustls QUIC specific client config to
// a quinn client config
- let mut client_config =
- quinn::ClientConfig::new(Arc::new(QuicClientConfig::try_from(client_config)?));
+ let mut client_config = quinn::ClientConfig::new(Arc::new(
+ QuicClientConfig::try_from(client_config).expect("known ciphersuite"),
+ ));
// enable the receive side of address discovery
let mut transport = quinn_proto::TransportConfig::default();
@@ -284,9 +280,15 @@ impl QuicClient {
// timeout (set to 30s by default).
transport.initial_rtt(Duration::from_millis(111));
transport.receive_observed_address_reports(true);
+
+ // keep it alive
+ transport.keep_alive_interval(Some(Duration::from_secs(25)));
+ transport.max_idle_timeout(Some(
+ Duration::from_secs(35).try_into().expect("known value"),
+ ));
client_config.transport_config(Arc::new(transport));
- Ok(Self { ep, client_config })
+ Self { ep, client_config }
}
/// Client side of QUIC address discovery.
@@ -295,7 +297,8 @@ impl QuicClient {
/// and estimated latency of the connection.
///
/// Consumes and gracefully closes the connection.
- pub async fn get_addr_and_latency(
+ #[cfg(test)]
+ async fn get_addr_and_latency(
&self,
server_addr: SocketAddr,
host: &str,
@@ -337,16 +340,30 @@ impl QuicClient {
conn.close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON);
Ok((observed_addr, latency))
}
+
+ /// Create a connection usable for qad
+ pub async fn create_conn(
+ &self,
+ server_addr: SocketAddr,
+ host: &str,
+ ) -> Result {
+ let config = self.client_config.clone();
+ let connecting = self.ep.connect_with(config, server_addr, host);
+ let conn = connecting?.await?;
+ Ok(conn)
+ }
}
#[cfg(all(test, feature = "server"))]
mod tests {
use std::net::Ipv4Addr;
- use n0_future::{task::AbortOnDropHandle, time};
+ use n0_future::{
+ task::AbortOnDropHandle,
+ time::{self, Instant},
+ };
use n0_snafu::{Error, Result, ResultExt};
use quinn::crypto::rustls::QuicServerConfig;
- use tokio::time::Instant;
use tracing::{debug, info, info_span, Instrument};
use tracing_test::traced_test;
use webpki_types::PrivatePkcs8KeyDer;
@@ -376,7 +393,7 @@ mod tests {
// create the client configuration used for the client endpoint when they
// initiate a connection with the server
let client_config = crate::client::make_dangerous_client_config();
- let quic_client = QuicClient::new(client_endpoint.clone(), client_config)?;
+ let quic_client = QuicClient::new(client_endpoint.clone(), client_config);
let (addr, _latency) = quic_client
.get_addr_and_latency(quic_server.bind_addr(), &host.to_string())
@@ -409,7 +426,7 @@ mod tests {
// create the client configuration used for the client endpoint when they
// initiate a connection with the server
let client_config = crate::client::make_dangerous_client_config();
- let quic_client = QuicClient::new(client_endpoint.clone(), client_config)?;
+ let quic_client = QuicClient::new(client_endpoint.clone(), client_config);
// Start a connection attempt with nirvana - this will fail
let task = AbortOnDropHandle::new(tokio::spawn({
@@ -509,7 +526,7 @@ mod tests {
// create the client configuration used for the client endpoint when they
// initiate a connection with the server
let client_config = crate::client::make_dangerous_client_config();
- let quic_client = QuicClient::new(client_endpoint.clone(), client_config)?;
+ let quic_client = QuicClient::new(client_endpoint.clone(), client_config);
// Now we should still connect, but it should take more than 1s.
info!("making QAD request");
diff --git a/iroh-relay/src/relay_map.rs b/iroh-relay/src/relay_map.rs
index b16da39384f..4f8740b7735 100644
--- a/iroh-relay/src/relay_map.rs
+++ b/iroh-relay/src/relay_map.rs
@@ -5,7 +5,7 @@ use std::{collections::BTreeMap, fmt, sync::Arc};
use iroh_base::RelayUrl;
use serde::{Deserialize, Serialize};
-use crate::defaults::{DEFAULT_RELAY_QUIC_PORT, DEFAULT_STUN_PORT};
+use crate::defaults::DEFAULT_RELAY_QUIC_PORT;
/// Configuration of all the relay servers that can be used.
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -68,7 +68,7 @@ impl FromIterator for RelayMap {
impl From for RelayMap {
/// Creates a [`RelayMap`] from a [`RelayUrl`].
///
- /// The [`RelayNode`]s in the [`RelayMap`] will have the default STUN and QUIC address
+ /// The [`RelayNode`]s in the [`RelayMap`] will have the default QUIC address
/// discovery ports.
fn from(value: RelayUrl) -> Self {
Self {
@@ -88,7 +88,7 @@ impl From for RelayMap {
impl FromIterator for RelayMap {
/// Creates a [`RelayMap`] from an iterator of [`RelayUrl`].
///
- /// The [`RelayNode`]s in the [`RelayMap`] will have the default STUN and QUIC address
+ /// The [`RelayNode`]s in the [`RelayMap`] will have the default QUIC address
/// discovery ports.
fn from_iter>(iter: T) -> Self {
Self {
@@ -116,15 +116,6 @@ impl fmt::Display for RelayMap {
pub struct RelayNode {
/// The [`RelayUrl`] where this relay server can be dialed.
pub url: RelayUrl,
- /// Whether this relay server should only be used for STUN requests.
- ///
- /// This essentially allows you to use a normal STUN server as a relay node, no relay
- /// functionality is used.
- pub stun_only: bool,
- /// The stun port of the relay server.
- ///
- /// Setting this to `0` means the default STUN port is used.
- pub stun_port: u16,
/// Configuration to speak to the QUIC endpoint on the relay server.
///
/// When `None`, we will not attempt to do QUIC address discovery
@@ -137,8 +128,6 @@ impl From for RelayNode {
fn from(value: RelayUrl) -> Self {
Self {
url: value,
- stun_only: false,
- stun_port: DEFAULT_STUN_PORT,
quic: quic_config(),
}
}
diff --git a/iroh-relay/src/server.rs b/iroh-relay/src/server.rs
index 73bd29d712e..ed94a2c19ca 100644
--- a/iroh-relay/src/server.rs
+++ b/iroh-relay/src/server.rs
@@ -14,7 +14,6 @@
//! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over.
//! - HTTPS `/ping`: Used for net_report probes.
//! - HTTPS `/generate_204`: Used for net_report probes.
-//! - STUN: UDP port for STUN requests/responses.
use std::{fmt, future::Future, net::SocketAddr, num::NonZeroU32, pin::Pin, sync::Arc};
@@ -31,16 +30,15 @@ use n0_future::{future::Boxed, StreamExt};
use nested_enum_utils::common_fields;
use snafu::{Backtrace, ResultExt, Snafu};
use tokio::{
- net::{TcpListener, UdpSocket},
+ net::TcpListener,
task::{JoinError, JoinSet},
};
use tokio_util::task::AbortOnDropHandle;
-use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument};
+use tracing::{debug, error, info, info_span, instrument, Instrument};
use crate::{
defaults::DEFAULT_KEY_CACHE_CAPACITY,
http::RELAY_PROBE_PATH,
- protos,
quic::server::{QuicServer, QuicSpawnError, ServerHandle as QuicServerHandle},
};
@@ -54,7 +52,7 @@ pub(crate) mod streams;
pub mod testing;
pub use self::{
- metrics::{Metrics, RelayMetrics, StunMetrics},
+ metrics::{Metrics, RelayMetrics},
resolver::{ReloadingResolver, DEFAULT_CERT_RELOAD_INTERVAL},
};
@@ -69,8 +67,14 @@ const INDEX: &[u8] = br#"
"#;
const TLS_HEADERS: [(&str, &str); 2] = [
- ("Strict-Transport-Security", "max-age=63072000; includeSubDomains"),
- ("Content-Security-Policy", "default-src 'none'; frame-ancestors 'none'; form-action 'none'; base-uri 'self'; block-all-mixed-content; plugin-types 'none'")
+ (
+ "Strict-Transport-Security",
+ "max-age=63072000; includeSubDomains",
+ ),
+ (
+ "Content-Security-Policy",
+ "default-src 'none'; frame-ancestors 'none'; form-action 'none'; base-uri 'self'; block-all-mixed-content; plugin-types 'none'",
+ ),
];
type BytesBody = http_body_util::Full;
@@ -82,7 +86,7 @@ fn body_empty() -> BytesBody {
http_body_util::Full::new(hyper::body::Bytes::new())
}
-/// Configuration for the full Relay & STUN server.
+/// Configuration for the full Relay.
///
/// Be aware the generic parameters are for when using the Let's Encrypt TLS configuration.
/// If not used dummy ones need to be provided, e.g. `ServerConfig::<(), ()>::default()`.
@@ -90,8 +94,6 @@ fn body_empty() -> BytesBody {
pub struct ServerConfig {
/// Configuration for the Relay server, disabled if `None`.
pub relay: Option>,
- /// Configuration for the STUN server, disabled if `None`.
- pub stun: Option,
/// Configuration for the QUIC server, disabled if `None`.
pub quic: Option,
/// Socket to serve metrics on.
@@ -158,15 +160,6 @@ pub enum Access {
Deny,
}
-/// Configuration for the STUN server.
-#[derive(Debug)]
-pub struct StunConfig {
- /// The socket address on which the STUN server should bind.
- ///
- /// Normally you'd chose port `3478`, see [`crate::defaults::DEFAULT_STUN_PORT`].
- pub bind_addr: SocketAddr,
-}
-
/// Configuration for the QUIC server.
#[derive(Debug)]
pub struct QuicConfig {
@@ -241,17 +234,15 @@ pub enum CertConfig {
Reloading,
}
-/// A running Relay + STUN server.
+/// A running Relay + QAD server.
///
-/// This is a full Relay server, including STUN, Relay and various associated HTTP services.
+/// This is a full Relay server, including QAD, Relay and various associated HTTP services.
///
/// Dropping this will stop the server.
#[derive(Debug)]
pub struct Server {
/// The address of the HTTP server, if configured.
http_addr: Option,
- /// The address of the STUN server, if configured.
- stun_addr: Option,
/// The address of the HTTPS server, if the relay server is using TLS.
///
/// If the Relay server is not using TLS then it is served from the
@@ -285,9 +276,7 @@ pub struct Server {
pub enum SpawnError {
#[snafu(display("Unable to get local address"))]
LocalAddr { source: std::io::Error },
- #[snafu(display("Failed to bind STUN listener"))]
- UdpSocketBind { source: std::io::Error },
- #[snafu(display("Failed to bind STUN listener"))]
+ #[snafu(display("Failed to bind QAD listener"))]
QuicSpawn { source: QuicSpawnError },
#[snafu(display("Failed to parse TLS header"))]
TlsHeaderParse { source: InvalidHeaderValue },
@@ -347,30 +336,6 @@ impl Server {
);
}
- // Start the STUN server.
- let stun_addr = match config.stun {
- Some(stun) => {
- debug!("Starting STUN server");
- match UdpSocket::bind(stun.bind_addr).await {
- Ok(sock) => {
- let addr = sock.local_addr().context(LocalAddrSnafu)?;
- info!("STUN server listening on {addr}");
- let stun_metrics = metrics.stun.clone();
- tasks.spawn(
- async move {
- server_stun_listener(sock, stun_metrics).await;
- Ok(())
- }
- .instrument(info_span!("stun-server", %addr)),
- );
- Some(addr)
- }
- Err(err) => return Err(err).context(UdpSocketBindSnafu),
- }
- }
- None => None,
- };
-
// Start the Relay server, but first clone the certs out.
let certificates = config.relay.as_ref().and_then(|relay| {
relay.tls.as_ref().and_then(|tls| match tls.cert {
@@ -491,7 +456,6 @@ impl Server {
Ok(Self {
http_addr: http_addr.or(relay_addr),
- stun_addr,
https_addr: http_addr.and(relay_addr),
quic_addr,
relay_handle,
@@ -540,11 +504,6 @@ impl Server {
self.quic_addr
}
- /// The socket address the STUN server is listening on.
- pub fn stun_addr(&self) -> Option {
- self.stun_addr
- }
-
/// The certificates chain if configured with manual TLS certificates.
pub fn certificates(&self) -> Option>> {
self.certificates.clone()
@@ -643,91 +602,6 @@ async fn relay_supervisor(
ret
}
-/// Runs a STUN server.
-///
-/// When the future is dropped, the server stops.
-async fn server_stun_listener(sock: UdpSocket, metrics: Arc) {
- info!(addr = ?sock.local_addr().ok(), "running STUN server");
- let sock = Arc::new(sock);
- let mut buffer = vec![0u8; 64 << 10];
- let mut tasks = JoinSet::new();
- loop {
- tokio::select! {
- biased;
-
- Some(res) = tasks.join_next() => {
- if let Err(err) = res {
- if err.is_panic() {
- panic!("task panicked: {:#?}", err);
- }
- }
- }
- res = sock.recv_from(&mut buffer) => {
- match res {
- Ok((n, src_addr)) => {
- metrics.requests.inc();
- let pkt = &buffer[..n];
- if !protos::stun::is(pkt) {
- debug!(%src_addr, "STUN: ignoring non stun packet");
- metrics.bad_requests.inc();
- continue;
- }
- let pkt = pkt.to_vec();
- tasks.spawn(handle_stun_request(src_addr, pkt, sock.clone(), metrics.clone()));
- }
- Err(err) => {
- metrics.failures.inc();
- warn!("failed to recv: {err:#}");
- }
- }
- }
- }
- }
-}
-
-/// Handles a single STUN request, doing all logging required.
-async fn handle_stun_request(
- src_addr: SocketAddr,
- pkt: Vec,
- sock: Arc,
- metrics: Arc,
-) {
- let (txid, response) = match protos::stun::parse_binding_request(&pkt) {
- Ok(txid) => {
- debug!(%src_addr, %txid, "STUN: received binding request");
- (txid, protos::stun::response(txid, src_addr))
- }
- Err(err) => {
- metrics.bad_requests.inc();
- warn!(%src_addr, "STUN: invalid binding request: {:?}", err);
- return;
- }
- };
-
- match sock.send_to(&response, src_addr).await {
- Ok(len) => {
- if len != response.len() {
- warn!(
- %src_addr,
- %txid,
- "failed to write response, {len}/{} bytes sent",
- response.len()
- );
- } else {
- match src_addr {
- SocketAddr::V4(_) => metrics.ipv4_success.inc(),
- SocketAddr::V6(_) => metrics.ipv6_success.inc(),
- };
- }
- trace!(%src_addr, %txid, "sent {len} bytes");
- }
- Err(err) => {
- metrics.failures.inc();
- warn!(%src_addr, %txid, "failed to write response: {err:#}");
- }
- }
-}
-
fn root_handler(
_r: Request,
response: ResponseBuilder,
@@ -881,19 +755,17 @@ mod tests {
use iroh_base::{NodeId, RelayUrl, SecretKey};
use n0_future::{FutureExt, SinkExt, StreamExt};
use n0_snafu::{Result, ResultExt};
- use tokio::net::UdpSocket;
use tracing::{info, instrument};
use tracing_test::traced_test;
use super::{
- Access, AccessConfig, RelayConfig, Server, ServerConfig, SpawnError, StunConfig,
+ Access, AccessConfig, RelayConfig, Server, ServerConfig, SpawnError,
NO_CONTENT_CHALLENGE_HEADER, NO_CONTENT_RESPONSE_HEADER,
};
use crate::{
client::{conn::ReceivedMessage, ClientBuilder, SendMessage},
dns::DnsResolver,
http::{Protocol, HTTP_UPGRADE_PROTOCOL},
- protos,
};
async fn spawn_local_relay() -> std::result::Result {
@@ -906,7 +778,6 @@ mod tests {
access: AccessConfig::Everyone,
}),
quic: None,
- stun: None,
metrics_addr: None,
})
.await
@@ -962,7 +833,6 @@ mod tests {
key_cache_capacity: Some(1024),
access: AccessConfig::Everyone,
}),
- stun: None,
quic: None,
metrics_addr: Some((Ipv4Addr::LOCALHOST, 1234).into()),
})
@@ -1196,38 +1066,6 @@ mod tests {
Ok(())
}
- #[tokio::test]
- #[traced_test]
- async fn test_stun() {
- let server = Server::spawn(ServerConfig::<(), ()> {
- relay: None,
- stun: Some(StunConfig {
- bind_addr: (Ipv4Addr::LOCALHOST, 0).into(),
- }),
- quic: None,
- metrics_addr: None,
- })
- .await
- .unwrap();
-
- let txid = protos::stun::TransactionId::default();
- let req = protos::stun::request(txid);
- let socket = UdpSocket::bind("127.0.0.1:0").await.unwrap();
- socket
- .send_to(&req, server.stun_addr().unwrap())
- .await
- .unwrap();
-
- // get response
- let mut buf = vec![0u8; 64000];
- let (len, addr) = socket.recv_from(&mut buf).await.unwrap();
- assert_eq!(addr, server.stun_addr().unwrap());
- buf.truncate(len);
- let (txid_back, response_addr) = protos::stun::parse_response(&buf).unwrap();
- assert_eq!(txid, txid_back);
- assert_eq!(response_addr, socket.local_addr().unwrap());
- }
-
#[tokio::test]
#[traced_test]
async fn test_relay_access_control() -> Result<()> {
@@ -1257,7 +1095,6 @@ mod tests {
})),
}),
quic: None,
- stun: None,
metrics_addr: None,
})
.await?;
diff --git a/iroh-relay/src/server/metrics.rs b/iroh-relay/src/server/metrics.rs
index bf9e52df125..298f6e8141b 100644
--- a/iroh-relay/src/server/metrics.rs
+++ b/iroh-relay/src/server/metrics.rs
@@ -84,28 +84,10 @@ pub struct Metrics {
// pub average_queue_duration:
}
-/// Metrics tracked for the STUN server.
-#[derive(Debug, Default, MetricsGroup)]
-#[metrics(name = "stun")]
-pub struct StunMetrics {
- /// Number of STUN requests made to the server.
- pub requests: Counter,
- /// Number of successful ipv4 STUN requests served.
- pub ipv4_success: Counter,
- /// Number of successful ipv6 STUN requests served.
- pub ipv6_success: Counter,
- /// Number of bad requests made to the STUN endpoint.
- pub bad_requests: Counter,
- /// Number of STUN requests that end in failure.
- pub failures: Counter,
-}
-
/// All metrics tracked in the relay server.
#[derive(Debug, Default, Clone, MetricsGroupSet)]
#[metrics(name = "relay")]
pub struct RelayMetrics {
- /// Metrics tracked for the STUN server.
- pub stun: Arc,
/// Metrics tracked for the relay server.
pub server: Arc,
}
diff --git a/iroh-relay/src/server/testing.rs b/iroh-relay/src/server/testing.rs
index fd2989cd81d..efaaf030ed0 100644
--- a/iroh-relay/src/server/testing.rs
+++ b/iroh-relay/src/server/testing.rs
@@ -1,18 +1,7 @@
//! Exposes functions to quickly configure a server suitable for testing.
use std::net::Ipv4Addr;
-use super::{
- AccessConfig, CertConfig, QuicConfig, RelayConfig, ServerConfig, StunConfig, TlsConfig,
-};
-
-/// Creates a [`StunConfig`] suitable for testing.
-///
-/// To ensure port availability for testing, the port is configured to be assigned by the OS.
-pub fn stun_config() -> StunConfig {
- StunConfig {
- bind_addr: (Ipv4Addr::LOCALHOST, 0).into(),
- }
-}
+use super::{AccessConfig, CertConfig, QuicConfig, RelayConfig, ServerConfig, TlsConfig};
/// Creates a [`rustls::ServerConfig`] and certificates suitable for testing.
///
@@ -88,13 +77,11 @@ pub fn quic_config() -> QuicConfig {
/// Creates a [`ServerConfig`] suitable for testing.
///
/// - Relaying is enabled using [`relay_config`]
-/// - Stun is enabled using [`stun_config`]
/// - QUIC addr discovery is disabled.
/// - Metrics are not enabled.
pub fn server_config() -> ServerConfig<()> {
ServerConfig {
relay: Some(relay_config()),
- stun: Some(stun_config()),
quic: Some(quic_config()),
#[cfg(feature = "metrics")]
metrics_addr: None,
diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml
index 650e6d40229..987e070f1a6 100644
--- a/iroh/Cargo.toml
+++ b/iroh/Cargo.toml
@@ -41,7 +41,7 @@ http = "1"
iroh-base = { version = "0.35.0", default-features = false, features = ["key", "relay"], path = "../iroh-base" }
iroh-relay = { version = "0.35", path = "../iroh-relay", default-features = false }
n0-future = "0.1.2"
-n0-snafu = "0.2.0"
+n0-snafu = "0.2.1"
n0-watcher = "0.2"
nested_enum_utils = "0.2.1"
netwatch = { version = "0.6" }
@@ -109,7 +109,7 @@ parse-size = { version = "=1.0.0", optional = true, features = ['std'] } # pinne
hickory-resolver = "0.25.1"
igd-next = { version = "0.16", features = ["aio_tokio"] }
netdev = { version = "0.31.0" }
-portmapper = { version = "0.5.0", default-features = false }
+portmapper = { version = "0.6.0", default-features = false }
quinn = { package = "iroh-quinn", version = "0.14.0", default-features = false, features = ["runtime-tokio", "rustls-ring"] }
tokio = { version = "1", features = [
"io-util",
@@ -134,6 +134,7 @@ getrandom = { version = "0.3.2", features = ["wasm_js"] }
# target-common test/dev dependencies
[dev-dependencies]
+console_error_panic_hook = "0.1"
postcard = { version = "1.1.1", features = ["use-std"] }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
diff --git a/iroh/README.md b/iroh/README.md
index 025516e5d6f..751e705d542 100644
--- a/iroh/README.md
+++ b/iroh/README.md
@@ -4,9 +4,9 @@ Iroh is a library to establish direct connectivity between peers.
It's built on peer-to-peer [QUIC](https://en.wikipedia.org/wiki/QUIC) using both relays and holepunching.
The main structure for connection is the `Endpoint` entrypoint.
-Peer to peer connectivity is established with the help of a _relay server_. The relay server provides Session Traversal Utilities for NAT [(STUN)](https://en.wikipedia.org/wiki/STUN) for the peers. If no direct connection can be established, the connection is relayed via the server.
+Peer to peer connectivity is established with the help of a _relay server_. The relay server provides [QUIC Address Discovery](https://www.ietf.org/archive/id/draft-ietf-quic-address-discovery-00.html) (QAD) and hole-punching assistance for the peers. If no direct connection can be established, the connection is relayed via the server.
-Peers must know and do verify the PeerID of each other before they can connect. When using a relay server to aid the connection establishment they will register with a home relay server using their PublicKey. Other peers which can not establish a direct connection can then establish connection via this relay server. This will try to assist establishing a direct connection using STUN and holepunching but continue relaying if not possible.
+Peers must know and do verify the PeerID of each other before they can connect. When using a relay server to aid the connection establishment they will register with a home relay server using their PublicKey. Other peers which can not establish a direct connection can then establish connection via this relay server. This will try to assist establishing a direct connection using QAD and holepunching but continue relaying if not possible.
Peers can also connect directly without using a relay server. For this, however the listening peer must be directly reachable by the connecting peer via one of it's addresses.
diff --git a/iroh/bench/src/bin/bulk.rs b/iroh/bench/src/bin/bulk.rs
index 292ea89b697..37ff37e06b0 100644
--- a/iroh/bench/src/bin/bulk.rs
+++ b/iroh/bench/src/bin/bulk.rs
@@ -95,7 +95,6 @@ pub fn run_iroh(opt: Opt) -> Result<()> {
#[cfg(feature = "local-relay")]
if let Some(relay_server) = relay_server.as_ref() {
collect_and_print("RelayServerMetrics", &*relay_server.metrics().server);
- collect_and_print("RelayStunMetrics", &*relay_server.metrics().stun);
}
}
diff --git a/iroh/docs/local_relay_node.md b/iroh/docs/local_relay_node.md
index cb6736478fa..a90bfee3783 100644
--- a/iroh/docs/local_relay_node.md
+++ b/iroh/docs/local_relay_node.md
@@ -16,8 +16,6 @@ To connect to this iroh-relay when doing your normal iroh commands, adjust the i
# iroh.config.toml:
[[relay_nodes]]
url = "http://localhost:3340"
-stun_only = false
-stun_port = 3478
```
If you want to give a specific port for the iroh-relay to bind to, you can create a iroh-relay config file and pass that file in using the `--config_path` flag. You need to retain a `secret_key`, so it is recommended to run `iroh-relay --config-path [PATH]` once to generate a secret key and save it to the config file before doing further edits to the file.
@@ -29,13 +27,10 @@ To change the port you want to listen on, change the port in the `addr` field:
secret_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
addr = "[::]:12345"
-stun_port = 3478
hostname = "my.relay.network"
-enable_stun = true
enable_relay = true
```
Check [the iroh-relay file's](../src/bin/iroh-relay.rs) `Config` struct for documentation on each configuration field.
If you change the local iroh-relay server's configuration, however, be sure to adjust the associated fields in your iroh config as well.
-
diff --git a/iroh/docs/relay_nodes.md b/iroh/docs/relay_nodes.md
index 12dbee03b00..c8c8d643043 100644
--- a/iroh/docs/relay_nodes.md
+++ b/iroh/docs/relay_nodes.md
@@ -14,8 +14,6 @@ RelayNode {
url: format!("https://derp.iroh.network")
.parse()
.unwrap(),
- stun_only: false,
- stun_port: 3478,
}
```
@@ -26,7 +24,5 @@ RelayNode {
url: format!("https://eu1.derp.iroh.network")
.parse()
.unwrap(),
- stun_only: false,
- stun_port: 3478,
}
```
diff --git a/iroh/examples/transfer.rs b/iroh/examples/transfer.rs
index 371f717ce61..76b8c56321f 100644
--- a/iroh/examples/transfer.rs
+++ b/iroh/examples/transfer.rs
@@ -189,6 +189,9 @@ impl EndpointArgs {
}
};
builder = builder.secret_key(secret_key);
+ if Env::Dev == self.env {
+ builder = builder.insecure_skip_relay_cert_verify(true);
+ }
let relay_mode = if self.no_relay {
RelayMode::Disabled
@@ -245,17 +248,31 @@ impl EndpointArgs {
let node_id = endpoint.node_id();
println!("Our node id:\n\t{node_id}");
+
+ let eps = endpoint.direct_addresses().initialized().await?;
println!("Our direct addresses:");
- for local_endpoint in endpoint.direct_addresses().initialized().await? {
+ for local_endpoint in eps {
println!("\t{} (type: {:?})", local_endpoint.addr, local_endpoint.typ)
}
- if !self.no_relay {
- let relay_url = endpoint
- .home_relay()
- .get()?
- .pop()
- .context("Failed to resolve our home relay")?;
- println!("Our home relay server:\n\t{relay_url}");
+
+ if self.relay_only {
+ let relay_url = endpoint.home_relay().initialized().await?;
+ println!("Our home relay server:\t{relay_url}");
+ } else if !self.no_relay {
+ let relay_url = tokio::time::timeout(Duration::from_secs(2), async {
+ endpoint
+ .home_relay()
+ .initialized()
+ .await
+ .expect("disconnected")
+ })
+ .await
+ .ok();
+ if let Some(url) = relay_url {
+ println!("Our home relay server:\t{url}");
+ } else {
+ println!("No home relay server found");
+ }
}
println!();
diff --git a/iroh/src/defaults.rs b/iroh/src/defaults.rs
index 6b0aa6eb6d0..c7d96a7db36 100644
--- a/iroh/src/defaults.rs
+++ b/iroh/src/defaults.rs
@@ -5,11 +5,6 @@
///
/// The port is "QUIC" typed on a phone keypad.
pub use iroh_relay::defaults::DEFAULT_RELAY_QUIC_PORT;
-/// The default STUN port used by the Relay server.
-///
-/// The STUN port as defined by [RFC
-/// 8489]()
-pub use iroh_relay::defaults::DEFAULT_STUN_PORT;
use url::Url;
/// The default HTTP port used by the Relay server.
@@ -51,8 +46,6 @@ pub mod prod {
.expect("default url");
RelayNode {
url: url.into(),
- stun_only: false,
- stun_port: DEFAULT_STUN_PORT,
quic: Some(RelayQuicConfig::default()),
}
}
@@ -65,8 +58,6 @@ pub mod prod {
.expect("default_url");
RelayNode {
url: url.into(),
- stun_only: false,
- stun_port: DEFAULT_STUN_PORT,
quic: Some(RelayQuicConfig::default()),
}
}
@@ -79,8 +70,6 @@ pub mod prod {
.expect("default_url");
RelayNode {
url: url.into(),
- stun_only: false,
- stun_port: DEFAULT_STUN_PORT,
quic: Some(RelayQuicConfig::default()),
}
}
@@ -114,8 +103,6 @@ pub mod staging {
.expect("default url");
RelayNode {
url: url.into(),
- stun_only: false,
- stun_port: DEFAULT_STUN_PORT,
quic: Some(RelayQuicConfig::default()),
}
}
@@ -128,8 +115,6 @@ pub mod staging {
.expect("default_url");
RelayNode {
url: url.into(),
- stun_only: false,
- stun_port: DEFAULT_STUN_PORT,
quic: Some(RelayQuicConfig::default()),
}
}
diff --git a/iroh/src/discovery/dns.rs b/iroh/src/discovery/dns.rs
index 950160af229..aa4d7d7b0f3 100644
--- a/iroh/src/discovery/dns.rs
+++ b/iroh/src/discovery/dns.rs
@@ -11,7 +11,7 @@ use crate::{
Endpoint,
};
-const DNS_STAGGERING_MS: &[u64] = &[200, 300];
+pub(crate) const DNS_STAGGERING_MS: &[u64] = &[200, 300];
/// DNS node discovery
///
diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs
index 146407b8ba3..976cab46c22 100644
--- a/iroh/src/endpoint.rs
+++ b/iroh/src/endpoint.rs
@@ -1017,7 +1017,7 @@ impl Endpoint {
/// iroh nodes to establish direct connectivity, depending on the network
/// situation. The yielded lists of direct addresses contain both the locally-bound
/// addresses and the [`Endpoint`]'s publicly reachable addresses discovered through
- /// mechanisms such as [STUN] and port mapping. Hence usually only a subset of these
+ /// mechanisms such as [QAD] and port mapping. Hence usually only a subset of these
/// will be applicable to a certain remote iroh node.
///
/// The [`Endpoint`] continuously monitors the direct addresses for changes as its own
@@ -1045,7 +1045,7 @@ impl Endpoint {
/// # });
/// ```
///
- /// [STUN]: https://en.wikipedia.org/wiki/STUN
+ /// [QAD]: https://www.ietf.org/archive/id/draft-ietf-quic-address-discovery-00.html
pub fn direct_addresses(&self) -> n0_watcher::Direct>> {
self.msock.direct_addresses()
}
@@ -1081,7 +1081,7 @@ impl Endpoint {
/// # });
/// ```
#[doc(hidden)]
- pub fn net_report(&self) -> n0_watcher::Direct >> {
+ pub fn net_report(&self) -> impl Watcher> {
self.msock.net_report()
}
@@ -2816,8 +2816,8 @@ mod tests {
#[tokio::test]
#[traced_test]
- async fn test_direct_addresses_no_stun_relay() -> Result {
- let (relay_map, _, _guard) = run_relay_server_with(None, false).await?;
+ async fn test_direct_addresses_no_qad_relay() -> Result {
+ let (relay_map, _, _guard) = run_relay_server_with(false).await.unwrap();
let ep = Endpoint::builder()
.alpns(vec![TEST_ALPN.to_vec()])
@@ -3226,7 +3226,7 @@ mod tests {
.await?;
// can get a first report
- endpoint.net_report().initialized().await?;
+ endpoint.net_report().updated().await?;
Ok(())
}
diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs
index 982c412d5f6..1d0200afb9f 100644
--- a/iroh/src/lib.rs
+++ b/iroh/src/lib.rs
@@ -105,7 +105,7 @@
//!
//! Additionally to providing reliable connectivity between iroh nodes, Relay servers
//! provide some functions to assist in [hole punching]. They have various services to help
-//! nodes understand their own network situation. This includes offering a [STUN] server,
+//! nodes understand their own network situation. This includes offering a [QAD] server,
//! but also a few HTTP extra endpoints as well as responding to ICMP echo requests.
//!
//! By default the [number 0] relay servers are used, see [`RelayMode::Default`].
@@ -232,7 +232,7 @@
//! [bi-directional streams]: crate::endpoint::Connection::open_bi
//! [hole punching]: https://en.wikipedia.org/wiki/Hole_punching_(networking)
//! [socket addresses]: https://doc.rust-lang.org/stable/std/net/enum.SocketAddr.html
-//! [STUN]: https://en.wikipedia.org/wiki/STUN
+//! [QAD]: https://www.ietf.org/archive/id/draft-ietf-quic-address-discovery-00.html
//! [ALPN]: https://en.wikipedia.org/wiki/Application-Layer_Protocol_Negotiation
//! [HTTP3]: https://en.wikipedia.org/wiki/HTTP/3
//! [`SecretKey`]: crate::SecretKey
diff --git a/iroh/src/magicsock.rs b/iroh/src/magicsock.rs
index c73cd457bca..c8a753d8e74 100644
--- a/iroh/src/magicsock.rs
+++ b/iroh/src/magicsock.rs
@@ -31,10 +31,10 @@ use std::{
use bytes::Bytes;
use data_encoding::HEXLOWER;
use iroh_base::{NodeAddr, NodeId, PublicKey, RelayUrl, SecretKey};
-use iroh_relay::{protos::stun, RelayMap};
+use iroh_relay::RelayMap;
use n0_future::{
boxed::BoxStream,
- task::{self, JoinSet},
+ task::{self, AbortOnDropHandle},
time::{self, Duration, Instant},
StreamExt,
};
@@ -44,13 +44,13 @@ use netwatch::netmon;
#[cfg(not(wasm_browser))]
use netwatch::{ip::LocalAddresses, UdpSocket};
use quinn::{AsyncUdpSocket, ServerConfig};
-use rand::{seq::SliceRandom, Rng, SeedableRng};
+use rand::Rng;
use smallvec::SmallVec;
-use snafu::{IntoError, ResultExt, Snafu};
-use tokio::sync::{self, mpsc, Mutex};
+use snafu::{ResultExt, Snafu};
+use tokio::sync::{mpsc, Mutex};
+use tokio_util::sync::CancellationToken;
use tracing::{
- debug, error, error_span, event, info, info_span, instrument, trace, trace_span, warn,
- Instrument, Level, Span,
+ debug, error, event, info, info_span, instrument, trace, trace_span, warn, Instrument, Level,
};
use transports::LocalAddrsWatch;
use url::Url;
@@ -74,7 +74,7 @@ use crate::{
discovery::{Discovery, DiscoveryItem, DiscoverySubscribers, NodeData, UserData},
key::{public_ed_box, secret_ed_box, DecryptionError, SharedSecret},
metrics::EndpointMetrics,
- net_report::{self, IpMappedAddresses, Report, ReportError},
+ net_report::{self, IfStateDetails, IpMappedAddresses, Report},
};
mod metrics;
@@ -89,7 +89,7 @@ pub use self::{
node_map::{ConnectionType, ControlMsg, DirectAddrInfo, RemoteInfo},
};
-/// How long we consider a STUN-derived endpoint valid for. UDP NAT mappings typically
+/// How long we consider a QAD-derived endpoint valid for. UDP NAT mappings typically
/// expire at 30 seconds, so this is a few seconds shy of that.
const ENDPOINTS_FRESH_ENOUGH_DURATION: Duration = Duration::from_secs(27);
@@ -162,8 +162,10 @@ type RelayContents = SmallVec<[Bytes; 1]>;
pub(crate) struct Handle {
#[deref(forward)]
msock: Arc,
- // Empty when closed
- actor_tasks: Arc>>,
+ // empty when shutdown
+ actor_task: Arc>>>,
+ /// Token to cancel the actor task.
+ actor_token: CancellationToken,
// quinn endpoint
endpoint: quinn::Endpoint,
}
@@ -178,69 +180,52 @@ pub(crate) struct Handle {
/// It is usually only necessary to use a single [`MagicSock`] instance in an application, it
/// means any QUIC endpoints on top will be sharing as much information about nodes as
/// possible.
-#[derive(derive_more::Debug)]
+#[derive(Debug)]
pub(crate) struct MagicSock {
+ /// Channel to send to the internal actor.
actor_sender: mpsc::Sender,
- /// String representation of the node_id of this node.
- me: String,
-
- /// The DNS resolver to be used in this magicsock.
- #[cfg(not(wasm_browser))]
- dns_resolver: DnsResolver,
-
- /// Key for this node.
- secret_key: SecretKey,
- /// Encryption key for this node.
- secret_encryption_key: crypto_box::SecretKey,
+ /// NodeId of this node.
+ public_key: PublicKey,
+ // - State Management
/// Close is in progress (or done)
closing: AtomicBool,
/// Close was called.
closed: AtomicBool,
+
+ // - Networking Info
+ /// Our discovered direct addresses.
+ direct_addrs: DiscoveredDirectAddrs,
+ /// Our latest net-report
+ net_report: Watchable<(Option, UpdateReason)>,
/// If the last net_report report, reports IPv6 to be available.
ipv6_reported: Arc,
-
- /// Zero nodes means relay is disabled.
- relay_map: RelayMap,
/// Tracks the networkmap node entity for each node discovery key.
node_map: NodeMap,
/// Tracks the mapped IP addresses
ip_mapped_addrs: IpMappedAddresses,
- /// NetReport client
- net_reporter: net_report::Addr,
- /// The state for an active DiscoKey.
- disco_secrets: DiscoSecrets,
+ /// Local addresses
+ local_addrs_watch: LocalAddrsWatch,
+ /// Currently bound IP addresses of all sockets
+ #[cfg(not(wasm_browser))]
+ ip_bind_addrs: Vec,
+ /// The DNS resolver to be used in this magicsock.
+ #[cfg(not(wasm_browser))]
+ dns_resolver: DnsResolver,
- /// Disco (ping) queue
- disco_sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>,
+ /// Disco
+ disco: DiscoState,
+ // - Discovery
/// Optional discovery service
discovery: Option>,
-
/// Optional user-defined discover data.
discovery_user_data: RwLock>,
-
- /// Our discovered direct addresses.
- direct_addrs: DiscoveredDirectAddrs,
-
- /// Our latest net-report
- net_report: Watchable >>,
-
- /// List of CallMeMaybe disco messages that should be sent out after the next endpoint update
- /// completes
- pending_call_me_maybes: std::sync::Mutex>,
-
- /// Indicates the direct addr update state.
- direct_addr_update_state: DirectAddrUpdateState,
-
/// Broadcast channel for listening to discovery updates.
discovery_subscribers: DiscoverySubscribers,
+ /// Metrics
pub(crate) metrics: EndpointMetrics,
-
- local_addrs_watch: LocalAddrsWatch,
- #[cfg(not(wasm_browser))]
- ip_bind_addrs: Vec,
}
#[allow(missing_docs)]
@@ -288,10 +273,6 @@ impl MagicSock {
self.closed.load(Ordering::SeqCst)
}
- fn public_key(&self) -> PublicKey {
- self.secret_key.public()
- }
-
/// Get the cached version of addresses.
pub(crate) fn local_addr(&self) -> Vec {
self.local_addrs_watch.get().expect("disconnected")
@@ -358,8 +339,11 @@ impl MagicSock {
///
/// [`Watcher`]: n0_watcher::Watcher
/// [`Watcher::initialized`]: n0_watcher::Watcher::initialized
- pub(crate) fn net_report(&self) -> n0_watcher::Direct>> {
- self.net_report.watch()
+ pub(crate) fn net_report(&self) -> impl Watcher> {
+ self.net_report
+ .watch()
+ .map(|(r, _)| r)
+ .expect("disconnected")
}
/// Watch for changes to the home relay.
@@ -402,7 +386,7 @@ impl MagicSock {
}
/// Add addresses for a node to the magic socket's addresbook.
- #[instrument(skip_all, fields(me = %self.me))]
+ #[instrument(skip_all)]
pub fn add_node_addr(
&self,
mut addr: NodeAddr,
@@ -640,7 +624,7 @@ impl MagicSock {
self.metrics.magicsock.recv_gro_datagrams.inc();
}
- // Chunk through the datagrams in this GRO payload to find disco and stun
+ // Chunk through the datagrams in this GRO payload to find disco
// packets and forward them to the actor
for datagram in buf[..quinn_meta.len].chunks_mut(quinn_meta.stride) {
if datagram.len() < quinn_meta.stride {
@@ -651,19 +635,11 @@ impl MagicSock {
);
}
- // Detect DISCO and STUN datagrams and process them. Overwrite the first
+ // Detect DISCO datagrams and process them. Overwrite the first
// byte of those packets with zero to make Quinn ignore the packet. This
// relies on quinn::EndpointConfig::grease_quic_bit being set to `false`,
// which we do in Endpoint::bind.
- if source_addr.is_ip() && stun::is(datagram) {
- trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: stun packet");
- let packet2 = Bytes::copy_from_slice(datagram);
- self.net_reporter.receive_stun_packet(
- packet2,
- source_addr.clone().into_socket_addr().expect("checked"),
- );
- datagram[0] = 0u8;
- } else if let Some((sender, sealed_box)) = disco::source_and_box(datagram) {
+ if let Some((sender, sealed_box)) = disco::source_and_box(datagram) {
trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: disco packet");
self.handle_disco_message(sender, sealed_box, source_addr);
datagram[0] = 0u8;
@@ -752,7 +728,7 @@ impl MagicSock {
}
}
} else {
- // If all datagrams in this buf are DISCO or STUN, set len to zero to make
+ // If all datagrams in this buf are DISCO, set len to zero to make
// Quinn skip the buf completely.
quinn_meta.len = 0;
}
@@ -780,17 +756,17 @@ impl MagicSock {
if let transports::Addr::Relay(_, node_id) = src {
if node_id != &sender {
// TODO: return here?
- warn!("Received relay disco message from connection for {:?}, but with message from {}", node_id.fmt_short(), sender.fmt_short());
+ warn!(
+ "Received relay disco message from connection for {}, but with message from {}",
+ node_id.fmt_short(),
+ sender.fmt_short()
+ );
}
}
// We're now reasonably sure we're expecting communication from
// this node, do the heavy crypto lifting to see what they want.
- let dm = match self.disco_secrets.unseal_and_decode(
- &self.secret_encryption_key,
- sender,
- sealed_box.to_vec(),
- ) {
+ let dm = match self.disco.unseal_and_decode(sender, sealed_box) {
Ok(dm) => dm,
Err(DiscoBoxError::Open { source, .. }) => {
warn!(?source, "failed to open disco box");
@@ -898,11 +874,7 @@ impl MagicSock {
txn = ?dm.tx_id,
);
- if self
- .disco_sender
- .try_send((addr.clone(), sender, pong))
- .is_err()
- {
+ if !self.disco.try_send(addr.clone(), sender, pong) {
warn!(%addr, "failed to queue pong");
}
@@ -916,15 +888,6 @@ impl MagicSock {
}
}
- fn encode_disco_message(&self, dst_key: PublicKey, msg: &disco::Message) -> Bytes {
- self.disco_secrets.encode_and_seal(
- &self.secret_encryption_key,
- self.secret_key.public(),
- dst_key,
- msg,
- )
- }
-
fn send_ping_queued(&self, ping: SendPing) {
let SendPing {
id,
@@ -935,13 +898,9 @@ impl MagicSock {
} = ping;
let msg = disco::Message::Ping(disco::Ping {
tx_id,
- node_key: self.public_key(),
+ node_key: self.public_key,
});
- let sent = self
- .disco_sender
- .try_send((dst.clone(), dst_node, msg))
- .is_ok();
-
+ let sent = self.disco.try_send(dst.clone(), dst_node, msg);
if sent {
let msg_sender = self.actor_sender.clone();
trace!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent (queued)");
@@ -952,7 +911,7 @@ impl MagicSock {
}
}
- /// Tries to send the ping actions.
+ /// Send the given ping actions out.
async fn send_ping_actions(&self, sender: &UdpSender, msgs: Vec) -> io::Result<()> {
for msg in msgs {
// Abort sending as soon as we know we are shutting down.
@@ -961,20 +920,68 @@ impl MagicSock {
}
match msg {
PingAction::SendCallMeMaybe {
- ref relay_url,
+ relay_url,
dst_node,
} => {
- self.send_or_queue_call_me_maybe(relay_url, dst_node);
+ // Sends the call-me-maybe DISCO message, queuing if addresses are too stale.
+ //
+ // To send the call-me-maybe message, we need to know our current direct addresses. If
+ // this information is too stale, the call-me-maybe is queued while a net_report run is
+ // scheduled. Once this run finishes, the call-me-maybe will be sent.
+ match self.direct_addrs.fresh_enough() {
+ Ok(()) => {
+ let msg = disco::Message::CallMeMaybe(
+ self.direct_addrs.to_call_me_maybe_message(),
+ );
+ if !self.disco.try_send(
+ SendAddr::Relay(relay_url.clone()),
+ dst_node,
+ msg.clone(),
+ ) {
+ warn!(dstkey = %dst_node.fmt_short(), %relay_url, "relay channel full, dropping call-me-maybe");
+ } else {
+ debug!(dstkey = %dst_node.fmt_short(), %relay_url, "call-me-maybe sent");
+ }
+ }
+ Err(last_refresh_ago) => {
+ debug!(
+ ?last_refresh_ago,
+ "want call-me-maybe but direct addrs stale; queuing after restun",
+ );
+ self.actor_sender
+ .try_send(ActorMessage::ScheduleDirectAddrUpdate(
+ UpdateReason::RefreshForPeering,
+ Some((dst_node, relay_url)),
+ ))
+ .ok();
+ }
+ }
}
- PingAction::SendPing(ping) => {
- self.send_ping(sender, ping).await?;
+ PingAction::SendPing(SendPing {
+ id,
+ dst,
+ dst_node,
+ tx_id,
+ purpose,
+ }) => {
+ let msg = disco::Message::Ping(disco::Ping {
+ tx_id,
+ node_key: self.public_key,
+ });
+
+ self.send_disco_message(sender, dst.clone(), dst_node, msg)
+ .await?;
+ debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent");
+ let msg_sender = self.actor_sender.clone();
+ self.node_map
+ .notify_ping_sent(id, dst, tx_id, purpose, msg_sender);
}
}
}
Ok(())
}
- /// Send a disco message. UDP messages will be polled to send directly on the UDP socket.
+ /// Sends out a disco message.
async fn send_disco_message(
&self,
sender: &UdpSender,
@@ -994,7 +1001,8 @@ impl MagicSock {
"connection closed",
));
}
- let pkt = self.encode_disco_message(dst_key, &msg);
+
+ let pkt = self.disco.encode_and_seal(self.public_key, dst_key, &msg);
let transmit = transports::Transmit {
contents: &pkt,
@@ -1017,90 +1025,6 @@ impl MagicSock {
}
}
- async fn send_ping(&self, sender: &UdpSender, ping: SendPing) -> io::Result<()> {
- let SendPing {
- id,
- dst,
- dst_node,
- tx_id,
- purpose,
- } = ping;
- let msg = disco::Message::Ping(disco::Ping {
- tx_id,
- node_key: self.public_key(),
- });
-
- self.send_disco_message(sender, dst.clone(), dst_node, msg)
- .await?;
- debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent");
- let msg_sender = self.actor_sender.clone();
- self.node_map
- .notify_ping_sent(id, dst.clone(), tx_id, purpose, msg_sender);
- Ok(())
- }
-
- fn send_queued_call_me_maybes(&self) {
- let msg = self.direct_addrs.to_call_me_maybe_message();
- let msg = disco::Message::CallMeMaybe(msg);
- for (public_key, url) in self
- .pending_call_me_maybes
- .lock()
- .expect("poisoned")
- .drain()
- {
- if self
- .disco_sender
- .try_send((SendAddr::Relay(url), public_key, msg.clone()))
- .is_err()
- {
- warn!(node = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe");
- }
- }
- }
-
- /// Sends the call-me-maybe DISCO message, queuing if addresses are too stale.
- ///
- /// To send the call-me-maybe message, we need to know our current direct addresses. If
- /// this information is too stale, the call-me-maybe is queued while a net_report run is
- /// scheduled. Once this run finishes, the call-me-maybe will be sent.
- fn send_or_queue_call_me_maybe(&self, url: &RelayUrl, dst_node: NodeId) {
- match self.direct_addrs.fresh_enough() {
- Ok(()) => {
- let msg = self.direct_addrs.to_call_me_maybe_message();
- let msg = disco::Message::CallMeMaybe(msg);
- if self
- .disco_sender
- .try_send((SendAddr::Relay(url.clone()), dst_node, msg.clone()))
- .is_err()
- {
- warn!(dstkey = %dst_node.fmt_short(), relayurl = %url,
- "relay channel full, dropping call-me-maybe");
- } else {
- debug!(dstkey = %dst_node.fmt_short(), relayurl = %url, "call-me-maybe sent");
- }
- }
- Err(last_refresh_ago) => {
- self.pending_call_me_maybes
- .lock()
- .expect("poisoned")
- .insert(dst_node, url.clone());
- debug!(
- ?last_refresh_ago,
- "want call-me-maybe but direct addrs stale; queuing after restun",
- );
- self.re_stun("refresh-for-peering");
- }
- }
- }
-
- /// Triggers an address discovery. The provided why string is for debug logging only.
- #[instrument(skip_all)]
- fn re_stun(&self, why: &'static str) {
- debug!("re_stun: {}", why);
- self.metrics.magicsock.re_stun_calls.inc();
- self.direct_addr_update_state.schedule_run(why);
- }
-
/// Publishes our address to a discovery service, if configured.
///
/// Called whenever our addresses or home relay node changes.
@@ -1159,49 +1083,125 @@ impl From for MappedAddr {
/// and start a new one when the current one has finished
#[derive(Debug)]
struct DirectAddrUpdateState {
- /// If running, set to the reason for the currently the update.
- running: sync::watch::Sender>,
/// If set, start a new update as soon as the current one is finished.
- want_update: std::sync::Mutex >,
+ want_update: Option,
+ msock: Arc,
+ #[cfg(not(wasm_browser))]
+ port_mapper: portmapper::Client,
+ /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings.
+ net_reporter: Arc>,
+ relay_map: RelayMap,
+ run_done: mpsc::Sender<()>,
+}
+
+#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)]
+enum UpdateReason {
+ /// Initial state
+ #[default]
+ None,
+ RefreshForPeering,
+ Periodic,
+ PortmapUpdated,
+ LinkChangeMajor,
+ LinkChangeMinor,
+}
+
+impl UpdateReason {
+ fn is_major(self) -> bool {
+ matches!(self, Self::LinkChangeMajor)
+ }
}
impl DirectAddrUpdateState {
- fn new() -> Self {
- let (running, _) = sync::watch::channel(None);
+ fn new(
+ msock: Arc,
+ #[cfg(not(wasm_browser))] port_mapper: portmapper::Client,
+ net_reporter: Arc>,
+ relay_map: RelayMap,
+ run_done: mpsc::Sender<()>,
+ ) -> Self {
DirectAddrUpdateState {
- running,
want_update: Default::default(),
+ #[cfg(not(wasm_browser))]
+ port_mapper,
+ net_reporter,
+ msock,
+ relay_map,
+ run_done,
}
}
/// Schedules a new run, either starting it immediately if none is running or
/// scheduling it for later.
- fn schedule_run(&self, why: &'static str) {
- if self.is_running() {
- let _ = self.want_update.lock().expect("poisoned").insert(why);
- } else {
- self.run(why);
+ fn schedule_run(&mut self, why: UpdateReason, if_state: IfStateDetails) {
+ match self.net_reporter.clone().try_lock_owned() {
+ Ok(net_reporter) => {
+ self.run(why, if_state, net_reporter);
+ }
+ Err(_) => {
+ let _ = self.want_update.insert(why);
+ }
}
}
- /// Returns `true` if an update is currently in progress.
- fn is_running(&self) -> bool {
- self.running.borrow().is_some()
+ /// If another run is needed, triggers this run, otherwise does nothing.
+ fn try_run(&mut self, if_state: IfStateDetails) {
+ match self.net_reporter.clone().try_lock_owned() {
+ Ok(net_reporter) => {
+ if let Some(why) = self.want_update.take() {
+ self.run(why, if_state, net_reporter);
+ }
+ }
+ Err(_) => {
+ // do nothing
+ }
+ }
}
/// Trigger a new run.
- fn run(&self, why: &'static str) {
- self.running.send(Some(why)).ok();
- }
+ fn run(
+ &mut self,
+ why: UpdateReason,
+ if_state: IfStateDetails,
+ mut net_reporter: tokio::sync::OwnedMutexGuard,
+ ) {
+ debug!("starting direct addr update ({:?})", why);
+ #[cfg(not(wasm_browser))]
+ self.port_mapper.procure_mapping();
+ // Don't start a net report probe if we know
+ // we are shutting down
+ if self.msock.is_closing() || self.msock.is_closed() {
+ debug!("skipping net_report, socket is shutting down");
+ return;
+ }
+ if self.relay_map.is_empty() {
+ debug!("skipping net_report, empty RelayMap");
+ self.msock.net_report.set((None, why)).ok();
+ return;
+ }
- /// Clears the current running state.
- fn finish_run(&self) {
- self.running.send(None).ok();
- }
+ debug!("requesting net_report report");
+ let msock = self.msock.clone();
- /// Returns the next update, if one is set.
- fn next_update(&self) -> Option<&'static str> {
- self.want_update.lock().expect("poisoned").take()
+ let run_done = self.run_done.clone();
+ task::spawn(async move {
+ let fut = time::timeout(
+ NET_REPORT_TIMEOUT,
+ net_reporter.get_report(if_state, why.is_major()),
+ );
+ match fut.await {
+ Ok(report) => {
+ msock.net_report.set((Some(report), why)).ok();
+ }
+ Err(time::Elapsed { .. }) => {
+ warn!("net_report report timed out");
+ }
+ }
+
+ // mark run as finished
+ debug!("direct addr update done ({:?})", why);
+ run_done.send(()).await.ok();
+ });
}
}
@@ -1229,14 +1229,6 @@ pub enum CreateHandleError {
impl Handle {
/// Creates a magic [`MagicSock`] listening on [`Options::addr_v4`] and [`Options::addr_v6`].
async fn new(opts: Options) -> Result {
- let me = opts.secret_key.public().fmt_short();
-
- Self::with_name(me, opts)
- .instrument(error_span!("magicsock"))
- .await
- }
-
- async fn with_name(me: String, opts: Options) -> Result {
let Options {
addr_v4,
addr_v6,
@@ -1263,35 +1255,9 @@ impl Handle {
let (ip_transports, port_mapper) =
bind_ip(addr_v4, addr_v6, &metrics).context(BindSocketsSnafu)?;
- #[cfg(not(wasm_browser))]
- let v4_socket = ip_transports
- .iter()
- .find(|t| t.bind_addr().is_ipv4())
- .expect("must bind a ipv4 socket")
- .socket();
- #[cfg(not(wasm_browser))]
- let v6_socket = ip_transports.iter().find_map(|t| {
- if t.bind_addr().is_ipv6() {
- Some(t.socket())
- } else {
- None
- }
- });
-
let ip_mapped_addrs = IpMappedAddresses::default();
- let net_reporter = net_report::Client::new(
- #[cfg(not(wasm_browser))]
- Some(port_mapper.clone()),
- #[cfg(not(wasm_browser))]
- dns_resolver.clone(),
- #[cfg(not(wasm_browser))]
- Some(ip_mapped_addrs.clone()),
- metrics.net_report.clone(),
- );
-
let (actor_sender, actor_receiver) = mpsc::channel(256);
- let (disco_sender, mut disco_receiver) = mpsc::channel(256);
// load the node data
let node_map = node_map.unwrap_or_default();
@@ -1326,30 +1292,25 @@ impl Handle {
#[cfg(wasm_browser)]
let transports = Transports::new(relay_transports);
+ let (disco, disco_receiver) = DiscoState::new(secret_encryption_key);
+
let msock = Arc::new(MagicSock {
- me,
- secret_key,
- secret_encryption_key,
+ public_key: secret_key.public(),
closing: AtomicBool::new(false),
closed: AtomicBool::new(false),
+ disco,
actor_sender: actor_sender.clone(),
ipv6_reported,
- relay_map,
- net_reporter: net_reporter.addr(),
- disco_secrets: DiscoSecrets::default(),
node_map,
- ip_mapped_addrs,
- disco_sender,
+ ip_mapped_addrs: ip_mapped_addrs.clone(),
discovery,
discovery_user_data: RwLock::new(discovery_user_data),
direct_addrs: Default::default(),
- net_report: Default::default(),
- pending_call_me_maybes: Default::default(),
- direct_addr_update_state: DirectAddrUpdateState::new(),
+ net_report: Watchable::new((None, UpdateReason::None)),
#[cfg(not(wasm_browser))]
- dns_resolver,
+ dns_resolver: dns_resolver.clone(),
discovery_subscribers: DiscoverySubscribers::new(),
- metrics,
+ metrics: metrics.clone(),
local_addrs_watch: transports.local_addrs_watch(),
#[cfg(not(wasm_browser))]
ip_bind_addrs: transports.ip_bind_addrs(),
@@ -1363,8 +1324,7 @@ impl Handle {
// the packet if grease_quic_bit is set to false.
endpoint_config.grease_quic_bit(false);
- let sender1 = transports.create_sender(msock.clone());
- let sender2 = transports.create_sender(msock.clone());
+ let sender = transports.create_sender(msock.clone());
let local_addrs_watch = transports.local_addrs_watch();
let network_change_sender = transports.create_network_change_sender();
@@ -1382,23 +1342,10 @@ impl Handle {
)
.context(CreateQuinnEndpointSnafu)?;
- let mut actor_tasks = JoinSet::default();
-
- #[cfg(not(wasm_browser))]
- let _ = actor_tasks.spawn({
- let msock = msock.clone();
- async move {
- while let Some((dst, dst_key, msg)) = disco_receiver.recv().await {
- if let Err(err) = msock.send_disco_message(&sender1, dst.clone(), dst_key, msg).await {
- warn!(%dst, node = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)");
- }
- }
- }
- });
-
let network_monitor = netmon::Monitor::new()
.await
.context(CreateNetmonMonitorSnafu)?;
+
let qad_endpoint = endpoint.clone();
#[cfg(any(test, feature = "test-utils"))]
@@ -1412,46 +1359,66 @@ impl Handle {
let net_report_config = net_report::Options::default();
#[cfg(not(wasm_browser))]
- let net_report_config = net_report_config
- .stun_v4(Some(v4_socket))
- .stun_v6(v6_socket)
- .quic_config(Some(QuicConfig {
- ep: qad_endpoint,
- client_config,
- ipv4: true,
- ipv6,
- }));
+ let net_report_config = net_report_config.quic_config(Some(QuicConfig {
+ ep: qad_endpoint,
+ client_config,
+ ipv4: true,
+ ipv6,
+ }));
#[cfg(any(test, feature = "test-utils"))]
let net_report_config =
net_report_config.insecure_skip_relay_cert_verify(insecure_skip_relay_cert_verify);
+ let net_reporter = net_report::Client::new(
+ #[cfg(not(wasm_browser))]
+ dns_resolver,
+ #[cfg(not(wasm_browser))]
+ Some(ip_mapped_addrs),
+ relay_map.clone(),
+ net_report_config,
+ metrics.net_report.clone(),
+ );
+
+ let (direct_addr_done_tx, direct_addr_done_rx) = mpsc::channel(8);
+ let direct_addr_update_state = DirectAddrUpdateState::new(
+ msock.clone(),
+ #[cfg(not(wasm_browser))]
+ port_mapper,
+ Arc::new(Mutex::new(net_reporter)),
+ relay_map,
+ direct_addr_done_tx,
+ );
+
+ let netmon_watcher = network_monitor.interface_state();
let actor = Actor {
msg_receiver: actor_receiver,
- msg_sender: actor_sender,
msock: msock.clone(),
periodic_re_stun_timer: new_re_stun_timer(false),
- net_info_last: None,
- #[cfg(not(wasm_browser))]
- port_mapper,
- no_v4_send: false,
- net_reporter,
network_monitor,
- net_report_config,
+ netmon_watcher,
+ direct_addr_update_state,
network_change_sender,
+ direct_addr_done_rx,
+ pending_call_me_maybes: Default::default(),
+ disco_receiver,
};
- actor_tasks.spawn(
+
+ let actor_token = CancellationToken::new();
+ let token = actor_token.clone();
+ let actor_task = task::spawn(
actor
- .run(local_addrs_watch, sender2)
+ .run(token, local_addrs_watch, sender)
.instrument(info_span!("actor")),
);
- let actor_tasks = Arc::new(Mutex::new(actor_tasks));
+ let actor_task = Arc::new(Mutex::new(Some(AbortOnDropHandle::new(actor_task))));
Ok(Handle {
msock,
- actor_tasks,
+ actor_task,
endpoint,
+ actor_token,
})
}
@@ -1465,9 +1432,9 @@ impl Handle {
/// Only the first close does anything. Any later closes return nil.
/// Polling the socket ([`AsyncUdpSocket::poll_recv`]) will return [`Poll::Pending`]
/// indefinitely after this call.
- #[instrument(skip_all, fields(me = %self.msock.me))]
+ #[instrument(skip_all)]
pub(crate) async fn close(&self) {
- trace!("magicsock closing...");
+ trace!(me = ?self.public_key, "magicsock closing...");
// Initiate closing all connections, and refuse future connections.
self.endpoint.close(0u16.into(), b"");
@@ -1492,38 +1459,27 @@ impl Handle {
return;
}
self.msock.closing.store(true, Ordering::Relaxed);
- // If this fails, then there's no receiver listening for shutdown messages,
- // so nothing to shut down anyways.
- self.msock
- .actor_sender
- .send(ActorMessage::Shutdown)
- .await
- .ok();
- self.msock.closed.store(true, Ordering::SeqCst);
+ self.actor_token.cancel();
- let mut tasks = self.actor_tasks.lock().await;
-
- // give the tasks a moment to shutdown cleanly
- let tasks_ref = &mut tasks;
- let shutdown_done = time::timeout(Duration::from_millis(100), async move {
- while let Some(task) = tasks_ref.join_next().await {
- if let Err(err) = task {
+ if let Some(task) = self.actor_task.lock().await.take() {
+ // give the tasks a moment to shutdown cleanly
+ let shutdown_done = time::timeout(Duration::from_millis(100), async move {
+ if let Err(err) = task.await {
warn!("unexpected error in task shutdown: {:?}", err);
}
- }
- })
- .await;
- match shutdown_done {
- Ok(_) => trace!("tasks finished in time, shutdown complete"),
- Err(_elapsed) => {
- // shutdown all tasks
- warn!(
- "tasks didn't finish in time, aborting remaining {}/3 tasks",
- tasks.len()
- );
- tasks.shutdown().await;
+ })
+ .await;
+ match shutdown_done {
+ Ok(_) => trace!("tasks finished in time, shutdown complete"),
+ Err(time::Elapsed { .. }) => {
+ // Dropping the task will abort itt
+ warn!("tasks didn't finish in time, aborting");
+ }
}
}
+
+ self.msock.closed.store(true, Ordering::SeqCst);
+
trace!("magicsock closed");
}
}
@@ -1541,45 +1497,69 @@ fn default_quic_client_config() -> rustls::ClientConfig {
.with_no_client_auth()
}
-#[derive(Debug, Default)]
-struct DiscoSecrets(std::sync::Mutex>);
+#[derive(Debug)]
+struct DiscoState {
+ /// Encryption key for this node.
+ secret_encryption_key: crypto_box::SecretKey,
+ /// The state for an active DiscoKey.
+ secrets: std::sync::Mutex>,
+ /// Disco (ping) queue
+ sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>,
+}
-impl DiscoSecrets {
- fn get(&self, secret: &crypto_box::SecretKey, node_id: PublicKey, cb: F) -> T
- where
- F: FnOnce(&mut SharedSecret) -> T,
- {
- let mut inner = self.0.lock().expect("poisoned");
- let x = inner.entry(node_id).or_insert_with(|| {
- let public_key = public_ed_box(&node_id.public());
- SharedSecret::new(secret, &public_key)
- });
- cb(x)
+impl DiscoState {
+ fn new(
+ secret_encryption_key: crypto_box::SecretKey,
+ ) -> (Self, mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>) {
+ let (disco_sender, disco_receiver) = mpsc::channel(256);
+
+ (
+ Self {
+ secret_encryption_key,
+ secrets: Default::default(),
+ sender: disco_sender,
+ },
+ disco_receiver,
+ )
+ }
+
+ fn try_send(&self, dst: SendAddr, node_id: PublicKey, msg: disco::Message) -> bool {
+ self.sender.try_send((dst, node_id, msg)).is_ok()
}
fn encode_and_seal(
&self,
- this_secret_key: &crypto_box::SecretKey,
this_node_id: NodeId,
other_node_id: NodeId,
msg: &disco::Message,
) -> Bytes {
let mut seal = msg.as_bytes();
- self.get(this_secret_key, other_node_id, |secret| {
- secret.seal(&mut seal)
- });
+ self.get_secret(other_node_id, |secret| secret.seal(&mut seal));
disco::encode_message(&this_node_id, seal).into()
}
+
fn unseal_and_decode(
&self,
- secret: &crypto_box::SecretKey,
node_id: PublicKey,
- mut sealed_box: Vec,
+ sealed_box: &[u8],
) -> Result {
- self.get(secret, node_id, |secret| secret.open(&mut sealed_box))
+ let mut sealed_box = sealed_box.to_vec();
+ self.get_secret(node_id, |secret| secret.open(&mut sealed_box))
.context(OpenSnafu)?;
disco::Message::from_bytes(&sealed_box).context(ParseSnafu)
}
+
+ fn get_secret(&self, node_id: PublicKey, cb: F) -> T
+ where
+ F: FnOnce(&mut SharedSecret) -> T,
+ {
+ let mut inner = self.secrets.lock().expect("poisoned");
+ let x = inner.entry(node_id).or_insert_with(|| {
+ let public_key = public_ed_box(&node_id.public());
+ SharedSecret::new(&self.secret_encryption_key, &public_key)
+ });
+ cb(x)
+ }
}
#[allow(missing_docs)]
@@ -1665,14 +1645,10 @@ impl AsyncUdpSocket for MagicUdpSocket {
#[derive(Debug)]
enum ActorMessage {
- Shutdown,
PingActions(Vec),
EndpointPingExpired(usize, stun_rs::TransactionId),
- NetReport(
- Result>, NetReportError>,
- &'static str,
- ),
NetworkChange,
+ ScheduleDirectAddrUpdate(UpdateReason, Option<(NodeId, RelayUrl)>),
#[cfg(test)]
ForceNetworkChange(bool),
}
@@ -1680,28 +1656,20 @@ enum ActorMessage {
struct Actor {
msock: Arc,
msg_receiver: mpsc::Receiver,
- msg_sender: mpsc::Sender,
/// When set, is an AfterFunc timer that will call MagicSock::do_periodic_stun.
periodic_re_stun_timer: time::Interval,
- /// The `NetInfo` provided in the last call to `net_info_func`. It's used to deduplicate calls to netInfoFunc.
- net_info_last: Option,
-
- #[cfg(not(wasm_browser))]
- port_mapper: portmapper::Client,
-
- /// Configuration for net report
- net_report_config: net_report::Options,
-
- /// Whether IPv4 UDP is known to be unable to transmit
- /// at all. This could happen if the socket is in an invalid state
- /// (as can happen on darwin after a network link status change).
- no_v4_send: bool,
-
- /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings.
- net_reporter: net_report::Client,
network_monitor: netmon::Monitor,
+ netmon_watcher: n0_watcher::Direct,
network_change_sender: transports::NetworkChangeSender,
+ /// Indicates the direct addr update state.
+ direct_addr_update_state: DirectAddrUpdateState,
+ direct_addr_done_rx: mpsc::Receiver<()>,
+
+ /// List of CallMeMaybe disco messages that should be sent out after
+ /// the next endpoint update completes
+ pending_call_me_maybes: HashMap,
+ disco_receiver: mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>,
}
#[cfg(not(wasm_browser))]
@@ -1754,33 +1722,28 @@ fn bind_ip(
Ok((ip, port_mapper))
}
-#[derive(Debug, Snafu)]
-#[non_exhaustive]
-enum NetReportError {
- #[snafu(display("Net report not received"))]
- NotReceived,
- #[snafu(display("Net report timed out"))]
- Timeout,
- #[snafu(display("Net report encountered an error"))]
- NetReport { source: ReportError },
-}
-
impl Actor {
async fn run(
mut self,
+ shutdown_token: CancellationToken,
mut watcher: impl Watcher> + Send + Sync,
sender: UdpSender,
) {
+ // Initialize addresses
+ #[cfg(not(wasm_browser))]
+ self.update_direct_addresses(None);
+
// Setup network monitoring
- let mut netmon_watcher = self.network_monitor.interface_state();
- let mut current_netmon_state = netmon_watcher.get().expect("missing network state");
+ let mut current_netmon_state = self.netmon_watcher.get().expect("missing network state");
#[cfg(not(wasm_browser))]
let mut direct_addr_heartbeat_timer = time::interval(HEARTBEAT_INTERVAL);
- let mut direct_addr_update_receiver =
- self.msock.direct_addr_update_state.running.subscribe();
+
#[cfg(not(wasm_browser))]
- let mut portmap_watcher = self.port_mapper.watch_external_address();
+ let mut portmap_watcher = self
+ .direct_addr_update_state
+ .port_mapper
+ .watch_external_address();
let mut discovery_events: BoxStream = Box::pin(n0_future::stream::empty());
if let Some(d) = self.msock.discovery() {
@@ -1793,6 +1756,8 @@ impl Actor {
#[cfg_attr(wasm_browser, allow(unused_mut))]
let mut portmap_watcher_closed = false;
+ let mut net_report_watcher = self.msock.net_report.watch();
+
loop {
self.msock.metrics.magicsock.actor_tick_main.inc();
#[cfg(not(wasm_browser))]
@@ -1806,6 +1771,10 @@ impl Actor {
let direct_addr_heartbeat_timer_tick = n0_future::future::pending();
tokio::select! {
+ _ = shutdown_token.cancelled() => {
+ debug!("shutting down");
+ return;
+ }
msg = self.msg_receiver.recv(), if !receiver_closed => {
let Some(msg) = msg else {
trace!("tick: magicsock receiver closed");
@@ -1817,14 +1786,12 @@ impl Actor {
trace!(?msg, "tick: msg");
self.msock.metrics.magicsock.actor_tick_msg.inc();
- if self.handle_actor_message(msg, &sender).await {
- return;
- }
+ self.handle_actor_message(msg, &sender).await;
}
tick = self.periodic_re_stun_timer.tick() => {
trace!("tick: re_stun {:?}", tick);
self.msock.metrics.magicsock.actor_tick_re_stun.inc();
- self.msock.re_stun("periodic");
+ self.re_stun(UpdateReason::Periodic);
}
new_addr = watcher.updated() => {
match new_addr {
@@ -1839,6 +1806,32 @@ impl Actor {
}
}
}
+ report = net_report_watcher.updated() => {
+ match report {
+ Ok((report, _)) => {
+ self.handle_net_report_report(report);
+ #[cfg(not(wasm_browser))]
+ {
+ self.periodic_re_stun_timer = new_re_stun_timer(true);
+ }
+ }
+ Err(_) => {
+ warn!("net report watcher stopped");
+ }
+ }
+ }
+ reason = self.direct_addr_done_rx.recv() => {
+ match reason {
+ Some(()) => {
+ // check if a new run needs to be scheduled
+ let state = self.netmon_watcher.get().expect("disconnected");
+ self.direct_addr_update_state.try_run(state.into());
+ }
+ None => {
+ warn!("direct addr watcher died");
+ }
+ }
+ }
change = portmap_watcher_changed, if !portmap_watcher_closed => {
#[cfg(not(wasm_browser))]
{
@@ -1854,7 +1847,7 @@ impl Actor {
self.msock.metrics.magicsock.actor_tick_portmap_changed.inc();
let new_external_address = *portmap_watcher.borrow();
debug!("external address updated: {new_external_address:?}");
- self.msock.re_stun("portmap_updated");
+ self.re_stun(UpdateReason::PortmapUpdated);
}
#[cfg(wasm_browser)]
let _unused_in_browsers = change;
@@ -1874,15 +1867,7 @@ impl Actor {
self.handle_ping_actions(&sender, msgs).await;
}
}
- _ = direct_addr_update_receiver.changed() => {
- let reason = *direct_addr_update_receiver.borrow();
- trace!("tick: direct addr update receiver {:?}", reason);
- self.msock.metrics.magicsock.actor_tick_direct_addr_update_receiver.inc();
- if let Some(reason) = reason {
- self.refresh_direct_addrs(reason).await;
- }
- }
- state = netmon_watcher.updated() => {
+ state = self.netmon_watcher.updated() => {
let Ok(state) = state else {
trace!("tick: link change receiver closed");
self.msock.metrics.magicsock.actor_tick_other.inc();
@@ -1892,7 +1877,7 @@ impl Actor {
current_netmon_state = state;
trace!("tick: link change {}", is_major);
self.msock.metrics.magicsock.actor_link_change.inc();
- self.handle_network_change(is_major);
+ self.handle_network_change(is_major).await;
}
// Even if `discovery_events` yields `None`, it could begin to yield
// `Some` again in the future, so we don't want to disable this branch
@@ -1912,11 +1897,16 @@ impl Actor {
// Send the discovery item to the subscribers of the discovery broadcast stream.
self.msock.discovery_subscribers.send(discovery_item);
}
+ Some((dst, dst_key, msg)) = self.disco_receiver.recv() => {
+ if let Err(err) = self.msock.send_disco_message(&sender, dst.clone(), dst_key, msg).await {
+ warn!(%dst, node = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)");
+ }
+ }
}
}
}
- fn handle_network_change(&mut self, is_major: bool) {
+ async fn handle_network_change(&mut self, is_major: bool) {
debug!("link change detected: major? {}", is_major);
if is_major {
@@ -1925,14 +1915,20 @@ impl Actor {
}
#[cfg(not(wasm_browser))]
- self.msock.dns_resolver.clear_cache();
- self.msock.re_stun("link-change-major");
+ self.msock.dns_resolver.reset().await;
+ self.re_stun(UpdateReason::LinkChangeMajor);
self.reset_endpoint_states();
} else {
- self.msock.re_stun("link-change-minor");
+ self.re_stun(UpdateReason::LinkChangeMinor);
}
}
+ fn re_stun(&mut self, why: UpdateReason) {
+ let state = self.netmon_watcher.get().expect("disconnected");
+ self.direct_addr_update_state
+ .schedule_run(why, state.into());
+ }
+
#[instrument(skip_all)]
async fn handle_ping_actions(&mut self, sender: &UdpSender, msgs: Vec) {
if let Err(err) = self.msock.send_ping_actions(sender, msgs).await {
@@ -1943,65 +1939,30 @@ impl Actor {
/// Processes an incoming actor message.
///
/// Returns `true` if it was a shutdown.
- async fn handle_actor_message(&mut self, msg: ActorMessage, sender: &UdpSender) -> bool {
+ async fn handle_actor_message(&mut self, msg: ActorMessage, sender: &UdpSender) {
match msg {
- ActorMessage::Shutdown => {
- debug!("shutting down");
-
- self.msock.node_map.notify_shutdown();
- #[cfg(not(wasm_browser))]
- self.port_mapper.deactivate();
-
- debug!("shutdown complete");
- return true;
- }
ActorMessage::EndpointPingExpired(id, txid) => {
self.msock.node_map.notify_ping_timeout(id, txid);
}
- ActorMessage::NetReport(report, why) => {
- match report {
- Ok(report) => {
- self.handle_net_report_report(report).await;
- }
- Err(err) => {
- warn!(
- "failed to generate net_report report for: {}: {:?}",
- why, err
- );
- }
- }
- self.finalize_direct_addrs_update(why);
- }
ActorMessage::NetworkChange => {
self.network_monitor.network_change().await.ok();
}
+ ActorMessage::ScheduleDirectAddrUpdate(why, data) => {
+ if let Some((node, url)) = data {
+ self.pending_call_me_maybes.insert(node, url);
+ }
+ let state = self.netmon_watcher.get().expect("disconnected");
+ self.direct_addr_update_state
+ .schedule_run(why, state.into());
+ }
#[cfg(test)]
ActorMessage::ForceNetworkChange(is_major) => {
- self.handle_network_change(is_major);
+ self.handle_network_change(is_major).await;
}
ActorMessage::PingActions(ping_actions) => {
self.handle_ping_actions(sender, ping_actions).await;
}
}
-
- false
- }
-
- /// Refreshes knowledge about our direct addresses.
- ///
- /// In other words, this triggers a net_report run.
- ///
- /// Note that invoking this is managed by the [`DirectAddrUpdateState`] and this should
- /// never be invoked directly. Some day this will be refactored to not allow this easy
- /// mistake to be made.
- #[instrument(level = "debug", skip_all)]
- async fn refresh_direct_addrs(&mut self, why: &'static str) {
- self.msock.metrics.magicsock.update_direct_addrs.inc();
-
- debug!("starting direct addr update ({})", why);
- #[cfg(not(wasm_browser))]
- self.port_mapper.procure_mapping();
- self.update_net_info(why).await;
}
/// Updates the direct addresses of this magic socket.
@@ -2013,8 +1974,11 @@ impl Actor {
/// - A net_report report.
/// - The local interfaces IP addresses.
#[cfg(not(wasm_browser))]
- fn update_direct_addresses(&mut self, net_report_report: Option>) {
- let portmap_watcher = self.port_mapper.watch_external_address();
+ fn update_direct_addresses(&mut self, net_report_report: Option<&net_report::Report>) {
+ let portmap_watcher = self
+ .direct_addr_update_state
+ .port_mapper
+ .watch_external_address();
// We only want to have one DirectAddr for each SocketAddr we have. So we store
// this as a map of SocketAddr -> DirectAddrType. At the end we will construct a
@@ -2027,15 +1991,12 @@ impl Actor {
addrs
.entry(portmap_ext)
.or_insert(DirectAddrType::Portmapped);
- self.set_net_info_have_port_map();
}
// Next add STUN addresses from the net_report report.
if let Some(net_report_report) = net_report_report {
if let Some(global_v4) = net_report_report.global_v4 {
- addrs
- .entry(global_v4.into())
- .or_insert(DirectAddrType::Stun);
+ addrs.entry(global_v4.into()).or_insert(DirectAddrType::Qad);
// If they're behind a hard NAT and are using a fixed
// port locally, assume they might've added a static
@@ -2051,21 +2012,19 @@ impl Actor {
if let Some(port) = port {
if net_report_report
- .mapping_varies_by_dest_ip
+ .mapping_varies_by_dest()
.unwrap_or_default()
{
let mut addr = global_v4;
addr.set_port(port);
addrs
.entry(addr.into())
- .or_insert(DirectAddrType::Stun4LocalPort);
+ .or_insert(DirectAddrType::Qad4LocalPort);
}
}
}
if let Some(global_v6) = net_report_report.global_v6 {
- addrs
- .entry(global_v6.into())
- .or_insert(DirectAddrType::Stun);
+ addrs.entry(global_v6.into()).or_insert(DirectAddrType::Qad);
}
}
@@ -2077,7 +2036,6 @@ impl Actor {
.zip(self.msock.ip_local_addrs())
.collect();
- let msock = self.msock.clone();
let has_ipv4_unspecified = local_addrs.iter().find_map(|(_, a)| {
if a.is_ipv4() && a.ip().is_unspecified() {
Some(a.port())
@@ -2093,248 +2051,95 @@ impl Actor {
}
});
- // The following code can be slow, we do not want to block the caller since it would
- // block the actor loop.
- task::spawn(
- async move {
- // If a socket is bound to the unspecified address, create SocketAddrs for
- // each local IP address by pairing it with the port the socket is bound on.
- if local_addrs
- .iter()
- .any(|(_, local)| local.ip().is_unspecified())
- {
- // Depending on the OS and network interfaces attached and their state
- // enumerating the local interfaces can take a long time. Especially
- // Windows is very slow.
- let LocalAddresses {
- regular: mut ips,
- loopback,
- } = tokio::task::spawn_blocking(LocalAddresses::new)
- .await
- .expect("spawn panicked");
- if ips.is_empty() && addrs.is_empty() {
- // Include loopback addresses only if there are no other interfaces
- // or public addresses, this allows testing offline.
- ips = loopback;
- }
-
- for ip in ips {
- let port_if_unspecified = match ip {
- IpAddr::V4(_) => has_ipv4_unspecified,
- IpAddr::V6(_) => has_ipv6_unspecified,
- };
- if let Some(port) = port_if_unspecified {
- let addr = SocketAddr::new(ip, port);
- addrs.entry(addr).or_insert(DirectAddrType::Local);
- }
- }
- }
-
- // If a socket is bound to a specific address, add it.
- for (bound, local) in local_addrs {
- if !bound.ip().is_unspecified() {
- addrs.entry(local).or_insert(DirectAddrType::Local);
- }
+ // If a socket is bound to the unspecified address, create SocketAddrs for
+ // each local IP address by pairing it with the port the socket is bound on.
+ if local_addrs
+ .iter()
+ .any(|(_, local)| local.ip().is_unspecified())
+ {
+ let LocalAddresses {
+ regular: mut ips,
+ loopback,
+ } = self
+ .netmon_watcher
+ .get()
+ .expect("netmon disconnected")
+ .local_addresses;
+ if ips.is_empty() && addrs.is_empty() {
+ // Include loopback addresses only if there are no other interfaces
+ // or public addresses, this allows testing offline.
+ ips = loopback;
+ }
+
+ for ip in ips {
+ let port_if_unspecified = match ip {
+ IpAddr::V4(_) => has_ipv4_unspecified,
+ IpAddr::V6(_) => has_ipv6_unspecified,
+ };
+ if let Some(port) = port_if_unspecified {
+ let addr = SocketAddr::new(ip, port);
+ addrs.entry(addr).or_insert(DirectAddrType::Local);
}
-
- // Finally create and store store all these direct addresses and send any
- // queued call-me-maybe messages.
- msock.store_direct_addresses(
- addrs
- .iter()
- .map(|(addr, typ)| DirectAddr {
- addr: *addr,
- typ: *typ,
- })
- .collect(),
- );
- msock.send_queued_call_me_maybes();
- }
- .instrument(Span::current()),
- );
- }
-
- /// Called when a direct addr update is done, no matter if it was successful or not.
- fn finalize_direct_addrs_update(&mut self, why: &'static str) {
- let new_why = self.msock.direct_addr_update_state.next_update();
- if !self.msock.is_closed() {
- if let Some(new_why) = new_why {
- self.msock.direct_addr_update_state.run(new_why);
- return;
- }
- #[cfg(not(wasm_browser))]
- {
- self.periodic_re_stun_timer = new_re_stun_timer(true);
- }
- }
-
- self.msock.direct_addr_update_state.finish_run();
- debug!("direct addr update done ({})", why);
- }
-
- /// Updates `NetInfo.HavePortMap` to true.
- #[instrument(level = "debug", skip_all)]
- fn set_net_info_have_port_map(&mut self) {
- if let Some(ref mut net_info_last) = self.net_info_last {
- if net_info_last.have_port_map {
- // No change.
- return;
}
- net_info_last.have_port_map = true;
- self.net_info_last = Some(net_info_last.clone());
}
- }
- #[instrument(level = "debug", skip_all)]
- async fn call_net_info_callback(&mut self, ni: NetInfo) {
- if let Some(ref net_info_last) = self.net_info_last {
- if ni.basically_equal(net_info_last) {
- return;
+ // If a socket is bound to a specific address, add it.
+ for (bound, local) in local_addrs {
+ if !bound.ip().is_unspecified() {
+ addrs.entry(local).or_insert(DirectAddrType::Local);
}
}
- self.net_info_last = Some(ni);
+ // Finally create and store store all these direct addresses and send any
+ // queued call-me-maybe messages.
+ self.msock.store_direct_addresses(
+ addrs
+ .iter()
+ .map(|(addr, typ)| DirectAddr {
+ addr: *addr,
+ typ: *typ,
+ })
+ .collect(),
+ );
+ self.send_queued_call_me_maybes();
}
- /// Calls net_report.
- ///
- /// Note that invoking this is managed by [`DirectAddrUpdateState`] via
- /// [`Actor::refresh_direct_addrs`] and this should never be invoked directly. Some day
- /// this will be refactored to not allow this easy mistake to be made.
- #[instrument(level = "debug", skip_all)]
- async fn update_net_info(&mut self, why: &'static str) {
- // Don't start a net report probe if we know
- // we are shutting down
- if self.msock.is_closing() || self.msock.is_closed() {
- debug!("skipping net_report, socket is shutting down");
- return;
- }
- if self.msock.relay_map.is_empty() {
- debug!("skipping net_report, empty RelayMap");
- self.msg_sender
- .send(ActorMessage::NetReport(Ok(None), why))
- .await
- .ok();
- return;
- }
-
- let relay_map = self.msock.relay_map.clone();
- let opts = self.net_report_config.clone();
+ fn send_queued_call_me_maybes(&mut self) {
+ let msg = self.msock.direct_addrs.to_call_me_maybe_message();
+ let msg = disco::Message::CallMeMaybe(msg);
+ // allocate, to minimize locking duration
- debug!("requesting net_report report");
- match self.net_reporter.get_report_channel(relay_map, opts).await {
- Ok(rx) => {
- let msg_sender = self.msg_sender.clone();
- task::spawn(async move {
- let report = time::timeout(NET_REPORT_TIMEOUT, rx).await;
- let report = match report {
- Ok(Ok(Ok(report))) => Ok(Some(report)),
- Ok(Ok(Err(err))) => Err(NetReportSnafu.into_error(err)),
- Ok(Err(_)) => Err(NotReceivedSnafu.build()),
- Err(_) => Err(TimeoutSnafu.build()),
- };
- msg_sender
- .send(ActorMessage::NetReport(report, why))
- .await
- .ok();
- // The receiver of the NetReport message will call
- // .finalize_direct_addrs_update().
- });
- }
- Err(err) => {
- warn!("unable to start net_report generation: {:?}", err);
- self.finalize_direct_addrs_update(why);
+ for (public_key, url) in self.pending_call_me_maybes.drain() {
+ if !self
+ .msock
+ .disco
+ .try_send(SendAddr::Relay(url), public_key, msg.clone())
+ {
+ warn!(node = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe");
}
}
}
- async fn handle_net_report_report(&mut self, report: Option>) {
- if let Some(ref report) = report {
- // only returns Err if the report hasn't changed.
- self.msock.net_report.set(Some(report.clone())).ok();
- self.msock
- .ipv6_reported
- .store(report.ipv6, Ordering::Relaxed);
- let r = &report;
- trace!(
- "setting no_v4_send {} -> {}",
- self.no_v4_send,
- !r.ipv4_can_send
- );
- self.no_v4_send = !r.ipv4_can_send;
-
- #[cfg(not(wasm_browser))]
- let have_port_map = self.port_mapper.watch_external_address().borrow().is_some();
- #[cfg(wasm_browser)]
- let have_port_map = false;
-
- let mut ni = NetInfo {
- relay_latency: Default::default(),
- mapping_varies_by_dest_ip: r.mapping_varies_by_dest_ip,
- hair_pinning: r.hair_pinning,
- #[cfg(not(wasm_browser))]
- portmap_probe: r.portmap_probe.clone(),
- have_port_map,
- working_ipv6: Some(r.ipv6),
- os_has_ipv6: Some(r.os_has_ipv6),
- working_udp: Some(r.udp),
- working_icmp_v4: r.icmpv4,
- working_icmp_v6: r.icmpv6,
- preferred_relay: r.preferred_relay.clone(),
- };
- for (rid, d) in r.relay_v4_latency.iter() {
- ni.relay_latency
- .insert(format!("{rid}-v4"), d.as_secs_f64());
- }
- for (rid, d) in r.relay_v6_latency.iter() {
- ni.relay_latency
- .insert(format!("{rid}-v6"), d.as_secs_f64());
- }
-
- if ni.preferred_relay.is_none() {
- // Perhaps UDP is blocked. Pick a deterministic but arbitrary one.
- ni.preferred_relay = self.pick_relay_fallback();
+ fn handle_net_report_report(&mut self, mut report: Option) {
+ if let Some(ref mut r) = report {
+ self.msock.ipv6_reported.store(r.udp_v6, Ordering::Relaxed);
+ if r.preferred_relay.is_none() {
+ if let Some(my_relay) = self.msock.my_relay() {
+ r.preferred_relay.replace(my_relay);
+ }
}
// Notify all transports
- self.network_change_sender.on_network_change(&ni);
-
- // TODO: set link type
- self.call_net_info_callback(ni).await;
- }
- #[cfg(not(wasm_browser))]
- self.update_direct_addresses(report);
- }
-
- /// Returns a deterministic relay node to connect to. This is only used if net_report
- /// couldn't find the nearest one, for instance, if UDP is blocked and thus STUN
- /// latency checks aren't working.
- ///
- /// If no the [`RelayMap`] is empty, returns `0`.
- fn pick_relay_fallback(&self) -> Option {
- // TODO: figure out which relay node most of our nodes are using,
- // and use that region as our fallback.
- //
- // If we already had selected something in the past and it has any
- // nodes, we want to stay on it. If there are no nodes at all,
- // stay on whatever relay we previously picked. If we need to pick
- // one and have no node info, pick a node randomly.
- //
- // We used to do the above for legacy clients, but never updated it for disco.
-
- let my_relay = self.msock.my_relay();
- if my_relay.is_some() {
- return my_relay;
+ self.network_change_sender.on_network_change(r);
}
- let ids = self.msock.relay_map.urls().collect::>();
- let mut rng = rand::rngs::StdRng::seed_from_u64(0);
- ids.choose(&mut rng).map(|c| (*c).clone())
+ #[cfg(not(wasm_browser))]
+ self.update_direct_addresses(report.as_ref());
}
/// Resets the preferred address for all nodes.
/// This is called when connectivity changes enough that we no longer trust the old routes.
- #[instrument(skip_all, fields(me = %self.msock.me))]
+ #[instrument(skip_all)]
fn reset_endpoint_states(&mut self) {
self.msock.node_map.reset_node_states()
}
@@ -2386,7 +2191,7 @@ fn bind_with_fallback(mut addr: SocketAddr) -> io::Result {
///
/// These are all the [`DirectAddr`]s that this [`MagicSock`] is aware of for itself.
/// They include all locally bound ones as well as those discovered by other mechanisms like
-/// STUN.
+/// QAD.
#[derive(derive_more::Debug, Default, Clone)]
struct DiscoveredDirectAddrs {
/// The last set of discovered direct addresses.
@@ -2576,23 +2381,23 @@ pub enum DirectAddrType {
Unknown,
/// A locally bound socket address.
Local,
- /// Public internet address discovered via STUN.
+ /// Public internet address discovered via QAD.
///
- /// When possible an iroh node will perform STUN to discover which is the address
+ /// When possible an iroh node will perform QAD to discover which is the address
/// from which it sends data on the public internet. This can be different from locally
/// bound addresses when the node is on a local network which performs NAT or similar.
- Stun,
+ Qad,
/// An address assigned by the router using port mapping.
///
/// When possible an iroh node will request a port mapping from the local router to
/// get a publicly routable direct address.
Portmapped,
- /// Hard NAT: STUN'ed IPv4 address + local fixed port.
+ /// Hard NAT: QAD'ed IPv4 address + local fixed port.
///
/// It is possible to configure iroh to bound to a specific port and independently
/// configure the router to forward this port to the iroh node. This indicates a
- /// situation like this, which still uses STUN to discover the public address.
- Stun4LocalPort,
+ /// situation like this, which still uses QAD to discover the public address.
+ Qad4LocalPort,
}
impl Display for DirectAddrType {
@@ -2600,98 +2405,19 @@ impl Display for DirectAddrType {
match self {
DirectAddrType::Unknown => write!(f, "?"),
DirectAddrType::Local => write!(f, "local"),
- DirectAddrType::Stun => write!(f, "stun"),
+ DirectAddrType::Qad => write!(f, "qad"),
DirectAddrType::Portmapped => write!(f, "portmap"),
- DirectAddrType::Stun4LocalPort => write!(f, "stun4localport"),
+ DirectAddrType::Qad4LocalPort => write!(f, "qad4localport"),
}
}
}
-/// Contains information about the host's network state.
-#[derive(Debug, Clone, PartialEq)]
-pub(crate) struct NetInfo {
- /// Says whether the host's NAT mappings vary based on the destination IP.
- mapping_varies_by_dest_ip: Option,
-
- /// If their router does hairpinning. It reports true even if there's no NAT involved.
- hair_pinning: Option,
-
- /// Whether the host has IPv6 internet connectivity.
- working_ipv6: Option,
-
- /// Whether the OS supports IPv6 at all, regardless of whether IPv6 internet connectivity is available.
- os_has_ipv6: Option,
-
- /// Whether the host has UDP internet connectivity.
- working_udp: Option,
-
- /// Whether ICMPv4 works, `None` means not checked.
- working_icmp_v4: Option,
-
- /// Whether ICMPv6 works, `None` means not checked.
- working_icmp_v6: Option,
-
- /// Whether we have an existing portmap open (UPnP, PMP, or PCP).
- have_port_map: bool,
-
- /// Probe indicating the presence of port mapping protocols on the LAN.
- #[cfg(not(wasm_browser))]
- portmap_probe: Option,
-
- /// This node's preferred relay server for incoming traffic.
- ///
- /// The node might be be temporarily connected to multiple relay servers (to send to
- /// other nodes) but this is the relay on which you can always contact this node. Also
- /// known as home relay.
- preferred_relay: Option,
-
- /// The fastest recent time to reach various relay STUN servers, in seconds.
- ///
- /// This should only be updated rarely, or when there's a
- /// material change, as any change here also gets uploaded to the control plane.
- relay_latency: BTreeMap,
-}
-
-impl NetInfo {
- /// Checks if this is probably still the same network as *other*.
- ///
- /// This tries to compare the network situation, without taking into account things
- /// expected to change a little like e.g. latency to the relay server.
- fn basically_equal(&self, other: &Self) -> bool {
- let eq_icmp_v4 = match (self.working_icmp_v4, other.working_icmp_v4) {
- (Some(slf), Some(other)) => slf == other,
- _ => true, // ignore for comparison if only one report had this info
- };
- let eq_icmp_v6 = match (self.working_icmp_v6, other.working_icmp_v6) {
- (Some(slf), Some(other)) => slf == other,
- _ => true, // ignore for comparison if only one report had this info
- };
-
- #[cfg(not(wasm_browser))]
- let probe_eq = self.portmap_probe == other.portmap_probe;
- #[cfg(wasm_browser)]
- let probe_eq = true;
-
- self.mapping_varies_by_dest_ip == other.mapping_varies_by_dest_ip
- && self.hair_pinning == other.hair_pinning
- && self.working_ipv6 == other.working_ipv6
- && self.os_has_ipv6 == other.os_has_ipv6
- && self.working_udp == other.working_udp
- && eq_icmp_v4
- && eq_icmp_v6
- && self.have_port_map == other.have_port_map
- && probe_eq
- && self.preferred_relay == other.preferred_relay
- }
-}
-
#[cfg(test)]
mod tests {
use std::{collections::BTreeSet, sync::Arc, time::Duration};
use data_encoding::HEXLOWER;
- use iroh_base::{NodeAddr, NodeId, PublicKey, SecretKey};
- use iroh_relay::RelayMap;
+ use iroh_base::{NodeAddr, NodeId, PublicKey};
use n0_future::{time, StreamExt};
use n0_snafu::{Result, ResultExt};
use n0_watcher::Watcher;
@@ -2707,7 +2433,7 @@ mod tests {
dns::DnsResolver,
endpoint::{DirectAddr, PathSelection, Source},
magicsock::{node_map, Handle, MagicSock},
- tls, Endpoint, RelayMode,
+ tls, Endpoint, RelayMap, RelayMode, SecretKey,
};
const ALPN: &[u8] = b"n0/test/1";
diff --git a/iroh/src/magicsock/metrics.rs b/iroh/src/magicsock/metrics.rs
index b6d7fe5d44c..803a829bd48 100644
--- a/iroh/src/magicsock/metrics.rs
+++ b/iroh/src/magicsock/metrics.rs
@@ -8,7 +8,6 @@ use serde::{Deserialize, Serialize};
#[non_exhaustive]
#[metrics(name = "magicsock")]
pub struct Metrics {
- pub re_stun_calls: Counter,
pub update_direct_addrs: Counter,
// Sends (data or disco)
@@ -66,7 +65,6 @@ pub struct Metrics {
pub actor_tick_re_stun: Counter,
pub actor_tick_portmap_changed: Counter,
pub actor_tick_direct_addr_heartbeat: Counter,
- pub actor_tick_direct_addr_update_receiver: Counter,
pub actor_link_change: Counter,
pub actor_tick_other: Counter,
diff --git a/iroh/src/magicsock/node_map.rs b/iroh/src/magicsock/node_map.rs
index c15aa69c008..252f5d0eb97 100644
--- a/iroh/src/magicsock/node_map.rs
+++ b/iroh/src/magicsock/node_map.rs
@@ -265,13 +265,6 @@ impl NodeMap {
Some((public_key, udp_addr, relay_url, ping_actions))
}
- pub(super) fn notify_shutdown(&self) {
- let mut inner = self.inner.lock().expect("poisoned");
- for (_, ep) in inner.node_states_mut() {
- ep.reset();
- }
- }
-
pub(super) fn reset_node_states(&self) {
let mut inner = self.inner.lock().expect("poisoned");
for (_, ep) in inner.node_states_mut() {
diff --git a/iroh/src/magicsock/node_map/best_addr.rs b/iroh/src/magicsock/node_map/best_addr.rs
index 7670bb3e9ae..48866e27813 100644
--- a/iroh/src/magicsock/node_map/best_addr.rs
+++ b/iroh/src/magicsock/node_map/best_addr.rs
@@ -57,7 +57,6 @@ pub(super) enum State<'a> {
#[derive(Debug, Clone, Copy)]
pub enum ClearReason {
- Reset,
Inactive,
PongTimeout,
MatchesOurLocalAddr,
diff --git a/iroh/src/magicsock/node_map/node_state.rs b/iroh/src/magicsock/node_map/node_state.rs
index f48e5488c3a..be1e0a58dbe 100644
--- a/iroh/src/magicsock/node_map/node_state.rs
+++ b/iroh/src/magicsock/node_map/node_state.rs
@@ -6,7 +6,6 @@ use std::{
use data_encoding::HEXLOWER;
use iroh_base::{NodeAddr, NodeId, PublicKey, RelayUrl};
-use iroh_relay::protos::stun;
use n0_future::{
task::{self, AbortOnDropHandle},
time::{self, Duration, Instant},
@@ -44,7 +43,7 @@ const PING_TIMEOUT_DURATION: Duration = Duration::from_secs(5);
const GOOD_ENOUGH_LATENCY: Duration = Duration::from_millis(5);
/// How long since the last activity we try to keep an established endpoint peering alive.
-/// It's also the idle time at which we stop doing STUN queries to keep NAT mappings alive.
+/// It's also the idle time at which we stop doing QAD queries to keep NAT mappings alive.
pub(super) const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45);
/// How often we try to upgrade to a better patheven if we have some non-relay route that works.
@@ -67,7 +66,7 @@ pub(in crate::magicsock) struct SendPing {
pub id: usize,
pub dst: SendAddr,
pub dst_node: NodeId,
- pub tx_id: stun::TransactionId,
+ pub tx_id: stun_rs::TransactionId,
pub purpose: DiscoPingPurpose,
}
@@ -114,7 +113,7 @@ pub(super) struct NodeState {
/// The fallback/bootstrap path, if non-zero (non-zero for well-behaved clients).
relay_url: Option<(RelayUrl, PathState)>,
udp_paths: NodeUdpPaths,
- sent_pings: HashMap,
+ sent_pings: HashMap,
/// Last time this node was used.
///
/// A node is marked as in use when sending datagrams to them, or when having received
@@ -285,7 +284,9 @@ impl NodeState {
) -> (Option, Option) {
#[cfg(any(test, feature = "test-utils"))]
if self.path_selection == PathSelection::RelayOnly {
- debug!("in `RelayOnly` mode, giving the relay address as the only viable address for this endpoint");
+ debug!(
+ "in `RelayOnly` mode, giving the relay address as the only viable address for this endpoint"
+ );
return (None, self.relay_url());
}
let (best_addr, relay_url) = match self.udp_paths.send_addr(*now, have_ipv6) {
@@ -429,7 +430,7 @@ impl NodeState {
/// Cleanup the expired ping for the passed in txid.
#[instrument("disco", skip_all, fields(node = %self.node_id.fmt_short()))]
- pub(super) fn ping_timeout(&mut self, txid: stun::TransactionId) {
+ pub(super) fn ping_timeout(&mut self, txid: stun_rs::TransactionId) {
if let Some(sp) = self.sent_pings.remove(&txid) {
debug!(tx = %HEXLOWER.encode(&txid), addr = %sp.to, "pong not received in timeout");
match sp.to {
@@ -487,7 +488,7 @@ impl NodeState {
return None; // Similar to `RelayOnly` mode, we don't send UDP pings for hole-punching.
}
- let tx_id = stun::TransactionId::default();
+ let tx_id = stun_rs::TransactionId::default();
trace!(tx = %HEXLOWER.encode(&tx_id), %dst, ?purpose,
dst = %self.node_id.fmt_short(), "start ping");
event!(
@@ -511,7 +512,7 @@ impl NodeState {
pub(super) fn ping_sent(
&mut self,
to: SendAddr,
- tx_id: stun::TransactionId,
+ tx_id: stun_rs::TransactionId,
purpose: DiscoPingPurpose,
sender: mpsc::Sender,
) {
@@ -708,19 +709,6 @@ impl NodeState {
debug!(new = ?new_addrs , %paths, "added new direct paths for endpoint");
}
- /// Clears all the endpoint's p2p state, reverting it to a relay-only endpoint.
- #[instrument(skip_all, fields(node = %self.node_id.fmt_short()))]
- pub(super) fn reset(&mut self) {
- self.last_full_ping = None;
- self.udp_paths
- .best_addr
- .clear(ClearReason::Reset, self.relay_url.is_some());
-
- for es in self.udp_paths.paths.values_mut() {
- es.last_ping = None;
- }
- }
-
/// Handle a received Disco Ping.
///
/// - Ensures the paths the ping was received on is a known path for this endpoint.
@@ -733,7 +721,7 @@ impl NodeState {
pub(super) fn handle_ping(
&mut self,
path: SendAddr,
- tx_id: stun::TransactionId,
+ tx_id: stun_rs::TransactionId,
) -> PingHandled {
let now = Instant::now();
diff --git a/iroh/src/magicsock/node_map/path_state.rs b/iroh/src/magicsock/node_map/path_state.rs
index 7241121722a..2d6855cab30 100644
--- a/iroh/src/magicsock/node_map/path_state.rs
+++ b/iroh/src/magicsock/node_map/path_state.rs
@@ -6,7 +6,6 @@ use std::{
};
use iroh_base::NodeId;
-use iroh_relay::protos::stun;
use n0_future::time::{Duration, Instant};
use tracing::{debug, event, Level};
@@ -39,7 +38,7 @@ pub(super) struct PathState {
/// If non-zero, means that this was an endpoint that we learned about at runtime (from an
/// incoming ping). If so, we keep the time updated and use it to discard old candidates.
// NOTE: tx_id Originally added in tailscale due to .
- last_got_ping: Option<(Instant, stun::TransactionId)>,
+ last_got_ping: Option<(Instant, stun_rs::TransactionId)>,
/// The time this endpoint was last advertised via a call-me-maybe DISCO message.
pub(super) call_me_maybe_time: Option,
@@ -107,7 +106,7 @@ impl PathState {
pub(super) fn with_ping(
node_id: NodeId,
path: SendAddr,
- tx_id: stun::TransactionId,
+ tx_id: stun_rs::TransactionId,
source: Source,
now: Instant,
) -> Self {
@@ -239,7 +238,7 @@ impl PathState {
}
}
- pub(super) fn handle_ping(&mut self, tx_id: stun::TransactionId, now: Instant) -> PingRole {
+ pub(super) fn handle_ping(&mut self, tx_id: stun_rs::TransactionId, now: Instant) -> PingRole {
if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) {
PingRole::Duplicate
} else {
diff --git a/iroh/src/magicsock/transports.rs b/iroh/src/magicsock/transports.rs
index b37f9ee3b94..20936fbde39 100644
--- a/iroh/src/magicsock/transports.rs
+++ b/iroh/src/magicsock/transports.rs
@@ -21,7 +21,8 @@ pub(crate) use self::ip::IpTransport;
#[cfg(not(wasm_browser))]
use self::ip::{IpNetworkChangeSender, IpSender};
pub(crate) use self::relay::{RelayActorConfig, RelayTransport};
-use super::{MagicSock, NetInfo};
+use super::MagicSock;
+use crate::net_report::Report;
/// Manages the different underlying data transports that the magicsock
/// can support.
@@ -262,14 +263,14 @@ pub(crate) struct NetworkChangeSender {
}
impl NetworkChangeSender {
- pub(crate) fn on_network_change(&self, info: &NetInfo) {
+ pub(crate) fn on_network_change(&self, report: &Report) {
#[cfg(not(wasm_browser))]
for ip in &self.ip {
- ip.on_network_change(info);
+ ip.on_network_change(report);
}
for relay in &self.relay {
- relay.on_network_change(info);
+ relay.on_network_change(report);
}
}
@@ -337,10 +338,6 @@ impl Addr {
matches!(self, Self::Relay(..))
}
- pub(crate) fn is_ip(&self) -> bool {
- matches!(self, Self::Ip(..))
- }
-
/// Returns `None` if not an `Ip`.
pub(crate) fn into_socket_addr(self) -> Option {
match self {
diff --git a/iroh/src/magicsock/transports/ip.rs b/iroh/src/magicsock/transports/ip.rs
index cc4b945d541..68033695305 100644
--- a/iroh/src/magicsock/transports/ip.rs
+++ b/iroh/src/magicsock/transports/ip.rs
@@ -87,10 +87,6 @@ impl IpTransport {
}
}
- pub(crate) fn socket(&self) -> Arc {
- self.socket.clone()
- }
-
pub(super) fn create_sender(&self) -> IpSender {
let sender = self.socket.clone().create_sender();
IpSender {
@@ -109,14 +105,16 @@ pub(super) struct IpNetworkChangeSender {
impl IpNetworkChangeSender {
pub(super) fn rebind(&self) -> io::Result<()> {
+ let old_addr = self.local_addr.get();
self.socket.rebind()?;
let addr = self.socket.local_addr()?;
self.local_addr.set(addr).ok();
+ trace!("rebound from {} to {}", old_addr, addr);
Ok(())
}
- pub(super) fn on_network_change(&self, _info: &crate::magicsock::NetInfo) {
+ pub(super) fn on_network_change(&self, _info: &crate::magicsock::Report) {
// Nothing to do for now
}
}
diff --git a/iroh/src/magicsock/transports/relay.rs b/iroh/src/magicsock/transports/relay.rs
index 0a9d9ef89c0..9345bed6af2 100644
--- a/iroh/src/magicsock/transports/relay.rs
+++ b/iroh/src/magicsock/transports/relay.rs
@@ -142,8 +142,10 @@ pub(super) struct RelayNetworkChangeSender {
}
impl RelayNetworkChangeSender {
- pub(super) fn on_network_change(&self, info: &crate::magicsock::NetInfo) {
- self.send_relay_actor(RelayActorMessage::NetworkChange { info: info.clone() });
+ pub(super) fn on_network_change(&self, report: &crate::magicsock::Report) {
+ self.send_relay_actor(RelayActorMessage::NetworkChange {
+ report: report.clone(),
+ });
}
pub(super) fn rebind(&self) -> io::Result<()> {
diff --git a/iroh/src/magicsock/transports/relay/actor.rs b/iroh/src/magicsock/transports/relay/actor.rs
index 37e84d00a99..cf5c684cfde 100644
--- a/iroh/src/magicsock/transports/relay/actor.rs
+++ b/iroh/src/magicsock/transports/relay/actor.rs
@@ -62,7 +62,8 @@ use url::Url;
#[cfg(not(wasm_browser))]
use crate::dns::DnsResolver;
use crate::{
- magicsock::{Metrics as MagicsockMetrics, NetInfo, RelayContents},
+ magicsock::{Metrics as MagicsockMetrics, RelayContents},
+ net_report::Report,
util::MaybeFuture,
};
@@ -844,7 +845,7 @@ impl ConnectedRelayState {
pub(super) enum RelayActorMessage {
MaybeCloseRelaysOnRebind,
- NetworkChange { info: NetInfo },
+ NetworkChange { report: Report },
}
#[derive(Debug, Clone)]
@@ -970,8 +971,8 @@ impl RelayActor {
async fn handle_msg(&mut self, msg: RelayActorMessage) {
match msg {
- RelayActorMessage::NetworkChange { info } => {
- self.on_network_change(info).await;
+ RelayActorMessage::NetworkChange { report } => {
+ self.on_network_change(report).await;
}
RelayActorMessage::MaybeCloseRelaysOnRebind => {
self.maybe_close_relays_on_rebind().await;
@@ -1007,19 +1008,19 @@ impl RelayActor {
}
}
- async fn on_network_change(&mut self, info: NetInfo) {
+ async fn on_network_change(&mut self, report: Report) {
let my_relay = self.config.my_relay.get();
- if info.preferred_relay == my_relay {
+ if report.preferred_relay == my_relay {
// No change.
return;
}
let old_relay = self
.config
.my_relay
- .set(info.preferred_relay.clone())
+ .set(report.preferred_relay.clone())
.unwrap_or_else(|e| e);
- if let Some(relay_url) = info.preferred_relay {
+ if let Some(relay_url) = report.preferred_relay {
self.config.metrics.relay_home_change.inc();
// On change, notify all currently connected relay servers and
diff --git a/iroh/src/net_report.rs b/iroh/src/net_report.rs
index 4991db9b0ff..577e06ba504 100644
--- a/iroh/src/net_report.rs
+++ b/iroh/src/net_report.rs
@@ -11,43 +11,49 @@
#![cfg_attr(wasm_browser, allow(unused))]
use std::{
- collections::{BTreeMap, HashMap},
- fmt::{self, Debug},
- net::{SocketAddr, SocketAddrV4, SocketAddrV6},
+ collections::{BTreeMap, BTreeSet},
+ fmt::Debug,
+ net::SocketAddr,
sync::Arc,
};
-use bytes::Bytes;
+use defaults::timeouts::PROBES_TIMEOUT;
use iroh_base::RelayUrl;
#[cfg(not(wasm_browser))]
use iroh_relay::dns::DnsResolver;
-use iroh_relay::{protos::stun, RelayMap};
+#[cfg(not(wasm_browser))]
+use iroh_relay::quic::QuicClient;
+#[cfg(not(wasm_browser))]
+use iroh_relay::RelayNode;
+use iroh_relay::{
+ quic::{QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON},
+ RelayMap,
+};
+#[cfg(not(wasm_browser))]
+use n0_future::task;
use n0_future::{
- task::{self, AbortOnDropHandle},
- time::{Duration, Instant},
+ task::AbortOnDropHandle,
+ time::{self, Duration, Instant},
+ StreamExt,
};
-use nested_enum_utils::common_fields;
+use n0_watcher::{Watchable, Watcher};
+use tokio::task::JoinSet;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, trace, warn};
+
#[cfg(not(wasm_browser))]
-use netwatch::UdpSocket;
-use reportgen::ActorRunError;
-use snafu::Snafu;
-use tokio::sync::{self, mpsc, oneshot};
-use tracing::{debug, error, info_span, trace, warn, Instrument};
+use self::reportgen::QadProbeReport;
+use self::reportgen::{ProbeFinished, ProbeReport};
mod defaults;
-#[cfg(not(wasm_browser))]
-mod dns;
mod ip_mapped_addrs;
mod metrics;
-#[cfg(not(wasm_browser))]
-mod ping;
+mod probes;
+mod report;
mod reportgen;
mod options;
-#[cfg(not(wasm_browser))]
-pub use stun_utils::bind_local_stun_socket;
-
/// We "vendor" what we need of the library in browsers for simplicity.
///
/// We could consider making `portmapper` compile to wasm in the future,
@@ -68,153 +74,118 @@ pub(crate) mod portmapper {
}
pub(crate) use ip_mapped_addrs::{IpMappedAddr, IpMappedAddresses};
-pub use metrics::Metrics;
-pub use options::Options;
-pub use reportgen::QuicConfig;
+
+pub(crate) use self::reportgen::IfStateDetails;
#[cfg(not(wasm_browser))]
-use reportgen::SocketState;
+use self::reportgen::SocketState;
+pub use self::{
+ metrics::Metrics,
+ options::Options,
+ probes::Probe,
+ report::{RelayLatencies, Report},
+ reportgen::QuicConfig,
+};
+use crate::util::MaybeFuture;
const FULL_REPORT_INTERVAL: Duration = Duration::from_secs(5 * 60);
/// The maximum latency of all nodes, if none are found yet.
///
/// Normally the max latency of all nodes is computed, but if we don't yet know any nodes
-/// latencies we return this as default. This is the value of the initial STUN probe
+/// latencies we return this as default. This is the value of the initial QAD probe
/// delays. It is only used as time to wait for further latencies to arrive, which *should*
/// never happen unless there already is at least one latency. Yet here we are, defining a
/// default which will never be used.
const DEFAULT_MAX_LATENCY: Duration = Duration::from_millis(100);
-/// A net_report report.
-///
-/// Can be obtained by calling [`Client::get_report`].
-#[derive(Default, Debug, PartialEq, Eq, Clone)]
-pub struct Report {
- /// A UDP STUN round trip completed.
- pub udp: bool,
- /// An IPv6 STUN round trip completed.
- pub ipv6: bool,
- /// An IPv4 STUN round trip completed.
- pub ipv4: bool,
- /// An IPv6 packet was able to be sent
- pub ipv6_can_send: bool,
- /// an IPv4 packet was able to be sent
- pub ipv4_can_send: bool,
- /// could bind a socket to ::1
- pub os_has_ipv6: bool,
- /// An ICMPv4 round trip completed, `None` if not checked.
- pub icmpv4: Option,
- /// An ICMPv6 round trip completed, `None` if not checked.
- pub icmpv6: Option,
- /// Whether STUN results depend on which STUN server you're talking to (on IPv4).
- pub mapping_varies_by_dest_ip: Option,
- /// Whether STUN results depend on which STUN server you're talking to (on IPv6).
+const ENOUGH_NODES: usize = 3;
+
+/// Client to run net_reports.
+#[derive(Debug)]
+pub(crate) struct Client {
+ #[cfg(not(wasm_browser))]
+ socket_state: SocketState,
+ metrics: Arc,
+ probes: BTreeSet,
+ relay_map: RelayMap,
+ #[cfg(not(wasm_browser))]
+ qad_conns: QadConns,
+ #[cfg(any(test, feature = "test-utils"))]
+ insecure_skip_relay_cert_verify: bool,
+
+ /// A collection of previously generated reports.
///
- /// Note that we don't really expect this to happen and are merely logging this if
- /// detecting rather than using it. For now.
- pub mapping_varies_by_dest_ipv6: Option,
- /// Whether the router supports communicating between two local devices through the NATted
- /// public IP address (on IPv4).
- pub hair_pinning: Option,
- /// Probe indicating the presence of port mapping protocols on the LAN.
- pub portmap_probe: Option,
- /// `None` for unknown
- pub preferred_relay: Option,
- /// keyed by relay Url
- pub relay_latency: RelayLatencies,
- /// keyed by relay Url
- pub relay_v4_latency: RelayLatencies,
- /// keyed by relay Url
- pub relay_v6_latency: RelayLatencies,
- /// ip:port of global IPv4
- pub global_v4: Option,
- /// `[ip]:port` of global IPv6
- pub global_v6: Option,
- /// CaptivePortal is set when we think there's a captive portal that is
- /// intercepting HTTP traffic.
- pub captive_portal: Option,
+ /// Sometimes it is useful to look at past reports to decide what to do.
+ reports: Reports,
}
-impl fmt::Display for Report {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- fmt::Debug::fmt(&self, f)
- }
+#[cfg(not(wasm_browser))]
+#[derive(Debug, Default)]
+struct QadConns {
+ v4: Option<(RelayUrl, QadConn)>,
+ v6: Option<(RelayUrl, QadConn)>,
}
-/// Latencies per relay node.
-#[derive(Debug, Default, PartialEq, Eq, Clone)]
-pub struct RelayLatencies(BTreeMap);
-
-impl RelayLatencies {
- fn new() -> Self {
- Default::default()
- }
-
- /// Updates a relay's latency, if it is faster than before.
- fn update_relay(&mut self, url: RelayUrl, latency: Duration) {
- let val = self.0.entry(url).or_insert(latency);
- if latency < *val {
- *val = latency;
+#[cfg(not(wasm_browser))]
+impl QadConns {
+ fn clear(&mut self) {
+ if let Some((_, conn)) = self.v4.take() {
+ conn.conn
+ .close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON);
+ }
+ if let Some((_, conn)) = self.v6.take() {
+ conn.conn
+ .close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON);
}
}
- /// Merges another [`RelayLatencies`] into this one.
- ///
- /// For each relay the latency is updated using [`RelayLatencies::update_relay`].
- fn merge(&mut self, other: &RelayLatencies) {
- for (url, latency) in other.iter() {
- self.update_relay(url.clone(), latency);
+ fn current(&self) -> Vec {
+ let mut reports = Vec::new();
+ if let Some((_, ref conn)) = self.v4 {
+ if let Some(mut r) = conn.observer.get() {
+ // grab latest rtt
+ r.latency = conn.conn.rtt();
+ reports.push(ProbeReport::QadIpv4(r));
+ }
}
- }
- /// Returns the maximum latency for all relays.
- ///
- /// If there are not yet any latencies this will return [`DEFAULT_MAX_LATENCY`].
- fn max_latency(&self) -> Duration {
- self.0
- .values()
- .max()
- .copied()
- .unwrap_or(DEFAULT_MAX_LATENCY)
- }
+ if let Some((_, ref conn)) = self.v6 {
+ if let Some(mut r) = conn.observer.get() {
+ // grab latest rtt
+ r.latency = conn.conn.rtt();
+ reports.push(ProbeReport::QadIpv6(r));
+ }
+ }
- /// Returns an iterator over all the relays and their latencies.
- pub fn iter(&self) -> impl Iterator- + '_ {
- self.0.iter().map(|(k, v)| (k, *v))
+ reports
}
- fn len(&self) -> usize {
- self.0.len()
- }
+ fn watch_v4(&self) -> impl n0_future::Stream
- > + Unpin {
+ let watcher = self.v4.as_ref().map(|(_url, conn)| conn.observer.watch());
- fn is_empty(&self) -> bool {
- self.0.is_empty()
+ if let Some(watcher) = watcher {
+ watcher.stream_updates_only().boxed()
+ } else {
+ n0_future::stream::empty().boxed()
+ }
}
- fn get(&self, url: &RelayUrl) -> Option
{
- self.0.get(url).copied()
+ fn watch_v6(&self) -> impl n0_future::Stream- > + Unpin {
+ let watcher = self.v6.as_ref().map(|(_url, conn)| conn.observer.watch());
+ if let Some(watcher) = watcher {
+ watcher.stream_updates_only().boxed()
+ } else {
+ n0_future::stream::empty().boxed()
+ }
}
}
-/// Client to run net_reports.
-///
-/// Creating this creates a net_report actor which runs in the background. Most of the time
-/// it is idle unless [`Client::get_report`] is called, which is the main interface.
-///
-/// The [`Client`] struct can be cloned and results multiple handles to the running actor.
-/// If all [`Client`]s are dropped the actor stops running.
-///
-/// While running the net_report actor expects to be passed all received stun packets using
-/// `Addr::receive_stun_packet`.
+#[cfg(not(wasm_browser))]
#[derive(Debug)]
-pub struct Client {
- /// Channel to send message to the [`Actor`].
- ///
- /// If all senders are dropped, in other words all clones of this struct are dropped,
- /// the actor will terminate.
- addr: Addr,
- /// Ensures the actor is terminated when the client is dropped.
- _drop_guard: Arc
>,
+struct QadConn {
+ conn: quinn::Connection,
+ observer: Watchable>,
+ _handle: AbortOnDropHandle<()>,
}
#[derive(Debug)]
@@ -222,9 +193,9 @@ struct Reports {
/// Do a full relay scan, even if last is `Some`.
next_full: bool,
/// Some previous reports.
- prev: HashMap>,
+ prev: BTreeMap,
/// Most recent report.
- last: Option>,
+ last: Option,
/// Time of last full (non-incremental) report.
last_full: Instant,
}
@@ -232,7 +203,7 @@ struct Reports {
impl Default for Reports {
fn default() -> Self {
Self {
- next_full: Default::default(),
+ next_full: true,
prev: Default::default(),
last: Default::default(),
last_full: Instant::now(),
@@ -242,363 +213,63 @@ impl Default for Reports {
impl Client {
/// Creates a new net_report client.
- ///
- /// This starts a connected actor in the background. Once the client is dropped it will
- /// stop running.
- pub fn new(
- #[cfg(not(wasm_browser))] port_mapper: Option,
+ pub(crate) fn new(
#[cfg(not(wasm_browser))] dns_resolver: DnsResolver,
#[cfg(not(wasm_browser))] ip_mapped_addrs: Option,
+ relay_map: RelayMap,
+ opts: Options,
metrics: Arc,
) -> Self {
- let mut actor = Actor::new(
- #[cfg(not(wasm_browser))]
- port_mapper,
- #[cfg(not(wasm_browser))]
- dns_resolver,
- #[cfg(not(wasm_browser))]
- ip_mapped_addrs,
- metrics,
- );
- let addr = actor.addr();
- let task = task::spawn(
- async move { actor.run().await }.instrument(info_span!("net_report.actor")),
- );
- let drop_guard = AbortOnDropHandle::new(task);
- Client {
- addr,
- _drop_guard: Arc::new(drop_guard),
- }
- }
-
- /// Returns a new address to send messages to this actor.
- ///
- /// Unlike the client itself the returned [`Addr`] does not own the actor task, it only
- /// allows sending messages to the actor.
- pub fn addr(&self) -> Addr {
- self.addr.clone()
- }
+ let probes = opts.as_protocols();
+ #[cfg(any(test, feature = "test-utils"))]
+ let insecure_skip_relay_cert_verify = opts.insecure_skip_relay_cert_verify;
- /// Runs a net_report, returning the report.
- ///
- /// It may not be called concurrently with itself, `&mut self` takes care of that.
- ///
- /// The *stun_conn4* and *stun_conn6* endpoints are bound UDP sockets to use to send out
- /// STUN packets. This function **will not read from the sockets**, as they may be
- /// receiving other traffic as well, normally they are the sockets carrying the real
- /// traffic. Thus all stun packets received on those sockets should be passed to
- /// `Addr::receive_stun_packet` in order for this function to receive the stun
- /// responses and function correctly.
- ///
- /// If these are not passed in this will bind sockets for STUN itself, though results
- /// may not be as reliable.
- ///
- /// The *quic_config* takes a [`QuicConfig`], a combination of a QUIC endpoint and
- /// a client configuration that can be use for verifying the relay server connection.
- /// When available, the report will attempt to get an observed public address
- /// using QUIC address discovery.
- ///
- /// When `None`, it will disable the QUIC address discovery probes.
- ///
- /// This will attempt to use *all* probe protocols.
- #[cfg(test)]
- pub async fn get_report_all(
- &mut self,
- relay_map: RelayMap,
- #[cfg(not(wasm_browser))] stun_sock_v4: Option>,
- #[cfg(not(wasm_browser))] stun_sock_v6: Option>,
- #[cfg(not(wasm_browser))] quic_config: Option,
- ) -> Result, ReportError> {
#[cfg(not(wasm_browser))]
- let opts = Options::default()
- .stun_v4(stun_sock_v4)
- .stun_v6(stun_sock_v6)
- .quic_config(quic_config);
- #[cfg(wasm_browser)]
- let opts = Options::default();
-
- let rx = self.get_report_channel(relay_map, opts).await?;
- match rx.await {
- Ok(res) => res,
- Err(_) => Err(ActorGoneSnafu.build()),
- }
- }
-
- /// Runs a net_report, returning the report.
- ///
- /// It may not be called concurrently with itself, `&mut self` takes care of that.
- ///
- /// Look at [`Options`] for the different configuration options.
- pub async fn get_report(
- &mut self,
- relay_map: RelayMap,
- opts: Options,
- ) -> Result, ReportError> {
- let rx = self.get_report_channel(relay_map, opts).await?;
- match rx.await {
- Ok(res) => res,
- Err(_) => Err(ActorGoneSnafu.build()),
- }
- }
+ let quic_client = opts
+ .quic_config
+ .map(|c| iroh_relay::quic::QuicClient::new(c.ep, c.client_config));
- /// Get report with channel
- ///
- /// Look at [`Options`] for the different configuration options.
- pub(crate) async fn get_report_channel(
- &mut self,
- relay_map: RelayMap,
- opts: Options,
- ) -> Result, ReportError>>, ReportError> {
- let (tx, rx) = oneshot::channel();
- self.addr
- .send(Message::RunCheck {
- relay_map,
- opts,
- response_tx: tx,
- })
- .await
- .map_err(|_| ActorGoneSnafu.build())?;
- Ok(rx)
- }
-}
-
-#[derive(Debug)]
-pub(crate) struct Inflight {
- /// The STUN transaction ID.
- txn: stun::TransactionId,
- /// The time the STUN probe was sent.
- start: Instant,
- /// Response to send STUN results: latency of STUN response and the discovered address.
- s: sync::oneshot::Sender<(Duration, SocketAddr)>,
-}
-
-/// Messages to send to the [`Actor`].
-#[derive(Debug)]
-#[allow(clippy::large_enum_variant)]
-pub(crate) enum Message {
- /// Run a net_report.
- ///
- /// Only one net_report can be run at a time, trying to run multiple concurrently will
- /// fail.
- RunCheck {
- /// The map of relays we want to probe
- relay_map: RelayMap,
- /// Options for the report
- opts: Options,
- /// Channel to receive the response.
- response_tx: oneshot::Sender, ReportError>>,
- },
- /// A report produced by the [`reportgen`] actor.
- ReportReady { report: Box },
- /// The [`reportgen`] actor failed to produce a report.
- ReportAborted { reason: ActorRunError },
- /// An incoming STUN packet to parse.
- StunPacket {
- /// The raw UDP payload.
- payload: Bytes,
- /// The address this was claimed to be received from.
- from_addr: SocketAddr,
- },
- /// A probe wants to register an in-flight STUN request.
- ///
- /// The sender is signalled once the STUN packet is registered with the actor and will
- /// correctly accept the STUN response.
- InFlightStun(Inflight, oneshot::Sender<()>),
-}
-
-/// Sender to the main service.
-///
-/// Unlike [`Client`] this is the raw channel to send messages over. Keeping this alive
-/// will not keep the actor alive, which makes this handy to pass to internal tasks.
-#[derive(Debug, Clone)]
-pub struct Addr {
- sender: mpsc::Sender,
- metrics: Arc,
-}
-
-impl Addr {
- /// Pass a received STUN packet to the net_reporter.
- ///
- /// Normally the UDP sockets to send STUN messages from are passed in so that STUN
- /// packets are sent from the sockets that carry the real traffic. However because
- /// these sockets carry real traffic they will also receive non-STUN traffic, thus the
- /// net_report actor does not read from the sockets directly. If you receive a STUN
- /// packet on the socket you should pass it to this method.
- ///
- /// It is safe to call this even when the net_report actor does not currently have any
- /// in-flight STUN probes. The actor will simply ignore any stray STUN packets.
- ///
- /// There is an implicit queue here which may drop packets if the actor does not keep up
- /// consuming them.
- pub fn receive_stun_packet(&self, payload: Bytes, src: SocketAddr) {
- if let Err(mpsc::error::TrySendError::Full(_)) = self.sender.try_send(Message::StunPacket {
- payload,
- from_addr: src,
- }) {
- self.metrics.stun_packets_dropped.inc();
- warn!("dropping stun packet from {}", src);
- }
- }
-
- async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> {
- self.sender.send(msg).await.inspect_err(|_| {
- error!("net_report actor lost");
- })
- }
-}
-
-/// The net_report actor.
-///
-/// This actor runs for the entire duration there's a [`Client`] connected.
-#[derive(Debug)]
-struct Actor {
- // Actor plumbing.
- /// Actor messages channel.
- ///
- /// If there are no more senders the actor stops.
- receiver: mpsc::Receiver,
- /// The sender side of the messages channel.
- ///
- /// This allows creating new [`Addr`]s from the actor.
- sender: mpsc::Sender,
- /// A collection of previously generated reports.
- ///
- /// Sometimes it is useful to look at past reports to decide what to do.
- reports: Reports,
-
- // Actor configuration.
- /// The port mapper client, if those are requested.
- ///
- /// The port mapper is responsible for talking to routers via UPnP and the like to try
- /// and open ports.
- #[cfg(not(wasm_browser))]
- port_mapper: Option,
-
- // Actor state.
- /// Information about the currently in-flight STUN requests.
- ///
- /// This is used to complete the STUN probe when receiving STUN packets.
- in_flight_stun_requests: HashMap,
- /// The [`reportgen`] actor currently generating a report.
- current_report_run: Option,
-
- /// The DNS resolver to use for probes that need to perform DNS lookups
- #[cfg(not(wasm_browser))]
- dns_resolver: DnsResolver,
-
- /// The [`IpMappedAddresses`] that allows you to do QAD in iroh
- #[cfg(not(wasm_browser))]
- ip_mapped_addrs: Option,
- metrics: Arc,
-}
-
-impl Actor {
- /// Creates a new actor.
- ///
- /// This does not start the actor, see [`Actor::run`] for this. You should not
- /// normally create this directly but rather create a [`Client`].
- fn new(
- #[cfg(not(wasm_browser))] port_mapper: Option,
- #[cfg(not(wasm_browser))] dns_resolver: DnsResolver,
- #[cfg(not(wasm_browser))] ip_mapped_addrs: Option,
- metrics: Arc,
- ) -> Self {
- // TODO: consider an instrumented flume channel so we have metrics.
- let (sender, receiver) = mpsc::channel(32);
- Self {
- receiver,
- sender,
- reports: Default::default(),
- #[cfg(not(wasm_browser))]
- port_mapper,
- in_flight_stun_requests: Default::default(),
- current_report_run: None,
- #[cfg(not(wasm_browser))]
+ #[cfg(not(wasm_browser))]
+ let socket_state = SocketState {
+ quic_client,
dns_resolver,
- #[cfg(not(wasm_browser))]
ip_mapped_addrs,
- metrics,
- }
- }
-
- /// Returns the channel to send messages to the actor.
- fn addr(&self) -> Addr {
- Addr {
- sender: self.sender.clone(),
- metrics: self.metrics.clone(),
- }
- }
+ };
- /// Run the actor.
- ///
- /// It will now run and handle messages. Once the connected [`Client`] (including all
- /// its clones) is dropped this will terminate.
- async fn run(&mut self) {
- debug!("net_report actor starting");
- while let Some(msg) = self.receiver.recv().await {
- trace!(?msg, "handling message");
- match msg {
- Message::RunCheck {
- relay_map,
- opts,
- response_tx,
- } => {
- self.handle_run_check(relay_map, opts, response_tx);
- }
- Message::ReportReady { report } => {
- self.handle_report_ready(*report);
- }
- Message::ReportAborted { reason: err } => {
- self.handle_report_aborted(err);
- }
- Message::StunPacket { payload, from_addr } => {
- self.handle_stun_packet(&payload, from_addr);
- }
- Message::InFlightStun(inflight, response_tx) => {
- self.handle_in_flight_stun(inflight, response_tx);
- }
- }
+ Client {
+ #[cfg(not(wasm_browser))]
+ socket_state,
+ metrics,
+ reports: Reports::default(),
+ probes,
+ relay_map,
+ #[cfg(not(wasm_browser))]
+ qad_conns: QadConns::default(),
+ #[cfg(any(test, feature = "test-utils"))]
+ insecure_skip_relay_cert_verify,
}
}
- /// Starts a check run as requested by the [`Message::RunCheck`] message.
+ /// Generates a [`Report`].
///
- /// If *stun_sock_v4* or *stun_sock_v6* are not provided this will bind the sockets
- /// itself. This is not ideal since really you want to send STUN probes from the
- /// sockets you will be using.
- fn handle_run_check(
- &mut self,
- relay_map: RelayMap,
- opts: Options,
- response_tx: oneshot::Sender, ReportError>>,
- ) {
- let protocols = opts.to_protocols();
- #[cfg(not(wasm_browser))]
- let socket_state = SocketState {
- port_mapper: self.port_mapper.clone(),
- stun_sock4: opts.stun_sock_v4,
- stun_sock6: opts.stun_sock_v6,
- quic_config: opts.quic_config,
- dns_resolver: self.dns_resolver.clone(),
- ip_mapped_addrs: self.ip_mapped_addrs.clone(),
- };
- trace!("Attempting probes for protocols {protocols:#?}");
- if self.current_report_run.is_some() {
- response_tx.send(Err(AlreadyRunningSnafu.build())).ok();
- return;
- }
-
+ /// Look at [`Options`] for the different configuration options.
+ pub(crate) async fn get_report(&mut self, if_state: IfStateDetails, is_major: bool) -> Report {
let now = Instant::now();
- let mut do_full = self.reports.next_full
+ let mut do_full = is_major
+ || self.reports.next_full
|| now.duration_since(self.reports.last_full) > FULL_REPORT_INTERVAL;
+ debug!(%do_full, "net_report starting");
+
// If the last report had a captive portal and reported no UDP access,
// it's possible that we didn't get a useful net_report due to the
// captive portal blocking us. If so, make this report a full (non-incremental) one.
if !do_full {
if let Some(ref last) = self.reports.last {
- do_full = !last.udp && last.captive_portal.unwrap_or_default();
+ if !last.has_udp() && last.captive_portal == Some(true) {
+ do_full = true;
+ }
}
}
if do_full {
@@ -609,130 +280,345 @@ impl Actor {
}
self.metrics.reports.inc();
- let actor = reportgen::Client::new(
- self.addr(),
+ let enough_relays = std::cmp::min(self.relay_map.len(), ENOUGH_NODES);
+ #[cfg(wasm_browser)]
+ let if_state = IfStateDetails::default();
+ #[cfg(not(wasm_browser))]
+ let if_state = IfStateDetails {
+ have_v4: if_state.have_v4,
+ have_v6: if_state.have_v6,
+ };
+
+ let mut report = Report::default();
+
+ // Start the reportgen client to start any needed probes
+ let (actor, mut probe_rx) = reportgen::Client::new(
self.reports.last.clone(),
- relay_map,
- protocols,
- self.metrics.clone(),
+ self.relay_map.clone(),
+ self.probes.clone(),
+ if_state.clone(),
#[cfg(not(wasm_browser))]
- socket_state,
+ self.socket_state.clone(),
#[cfg(any(test, feature = "test-utils"))]
- opts.insecure_skip_relay_cert_verify,
+ self.insecure_skip_relay_cert_verify,
);
- self.current_report_run = Some(ReportRun {
- _reportgen: actor,
- report_tx: response_tx,
- });
- }
+ #[cfg(not(wasm_browser))]
+ let reports = self
+ .spawn_qad_probes(&if_state, enough_relays, do_full)
+ .await;
- fn handle_report_ready(&mut self, report: Report) {
- let report = self.finish_and_store_report(report);
- self.in_flight_stun_requests.clear();
- if let Some(ReportRun { report_tx, .. }) = self.current_report_run.take() {
- report_tx.send(Ok(report)).ok();
+ #[cfg(not(wasm_browser))]
+ for r in reports {
+ report.update(&r);
}
- }
- fn handle_report_aborted(&mut self, reason: ActorRunError) {
- self.in_flight_stun_requests.clear();
- if let Some(ReportRun { report_tx, .. }) = self.current_report_run.take() {
- report_tx.send(Err(AbortSnafu { reason }.build())).ok();
+ let mut timeout_fut = std::pin::pin!(MaybeFuture::default());
+
+ #[cfg(not(wasm_browser))]
+ let mut qad_v4_stream = self.qad_conns.watch_v4();
+ #[cfg(wasm_browser)]
+ let mut qad_v4_stream = n0_future::stream::empty::>();
+ #[cfg(not(wasm_browser))]
+ let mut qad_v6_stream = self.qad_conns.watch_v6();
+ #[cfg(wasm_browser)]
+ let mut qad_v6_stream = n0_future::stream::empty:: >();
+
+ loop {
+ tokio::select! {
+ biased;
+
+ _ = &mut timeout_fut, if timeout_fut.is_some() => {
+ trace!("timeout done, shutting down");
+ drop(actor); // shuts down the probes
+ break;
+ }
+
+ Some(Some(r)) = qad_v4_stream.next() => {
+ #[cfg(not(wasm_browser))]
+ {
+ trace!(?r, "new report from QAD V4");
+ report.update(&ProbeReport::QadIpv4(r));
+ }
+ }
+
+ Some(Some(r)) = qad_v6_stream.next() => {
+ #[cfg(not(wasm_browser))]
+ {
+ trace!(?r, "new report from QAD V6");
+ report.update(&ProbeReport::QadIpv6(r));
+ }
+ }
+
+ maybe_probe = probe_rx.recv() => {
+ let Some(probe_res) = maybe_probe else {
+ break;
+ };
+ trace!(?probe_res, "handling probe");
+ match probe_res {
+ ProbeFinished::Regular(probe) => match probe {
+ Ok(probe) => {
+ report.update(&probe);
+ if timeout_fut.is_none() {
+ if let Some(timeout) = self.have_enough_reports(enough_relays, &report) {
+ timeout_fut.as_mut().set_future(time::sleep(timeout));
+ }
+ }
+ }
+ Err(err) => {
+ trace!("probe errored: {:?}", err);
+ }
+ },
+ #[cfg(not(wasm_browser))]
+ ProbeFinished::CaptivePortal(portal) => {
+ report.captive_portal = portal;
+ }
+ }
+ }
+ }
}
+
+ self.add_report_history_and_set_preferred_relay(&mut report);
+ debug!(
+ ?report,
+ "generated report in {:02}ms",
+ now.elapsed().as_millis()
+ );
+
+ report
}
- /// Handles [`Message::StunPacket`].
- ///
- /// If there are currently no in-flight stun requests registered this is dropped,
- /// otherwise forwarded to the probe.
- fn handle_stun_packet(&mut self, pkt: &[u8], src: SocketAddr) {
- trace!(%src, "received STUN packet");
- if self.in_flight_stun_requests.is_empty() {
- return;
+ #[cfg(not(wasm_browser))]
+ async fn spawn_qad_probes(
+ &mut self,
+ if_state: &IfStateDetails,
+ enough_relays: usize,
+ do_full: bool,
+ ) -> Vec {
+ use tracing::{info_span, Instrument};
+
+ debug!("spawning QAD probes");
+
+ let Some(ref quic_client) = self.socket_state.quic_client else {
+ return Vec::new();
+ };
+
+ if do_full {
+ // clear out existing connections if we are doing a full reset
+ self.qad_conns.clear();
}
- #[cfg(feature = "metrics")]
- match &src {
- SocketAddr::V4(_) => {
- self.metrics.stun_packets_recv_ipv4.inc();
+ if let Some((url, conn)) = &self.qad_conns.v4 {
+ // verify conn is still around
+ if let Some(reason) = conn.conn.close_reason() {
+ trace!(?url, "QAD v4 conn closed: {}", reason);
+ self.qad_conns.v4.take();
}
- SocketAddr::V6(_) => {
- self.metrics.stun_packets_recv_ipv6.inc();
+ }
+ if let Some((url, conn)) = &self.qad_conns.v6 {
+ // verify conn is still around
+ if let Some(reason) = conn.conn.close_reason() {
+ trace!(?url, "QAD v6 conn closed: {}", reason);
+ self.qad_conns.v6.take();
}
}
+ if self.qad_conns.v4.is_some() && self.qad_conns.v6.is_some() == if_state.have_v6 {
+ trace!("not spawning QAD, already have probes");
+ return self.qad_conns.current();
+ }
- match stun::parse_response(pkt) {
- Ok((txn, addr_port)) => match self.in_flight_stun_requests.remove(&txn) {
- Some(inf) => {
- debug!(%src, %txn, "received known STUN packet");
- let elapsed = inf.start.elapsed();
- inf.s.send((elapsed, addr_port)).ok();
- }
- None => {
- debug!(%src, %txn, "received unexpected STUN message response");
- }
- },
- Err(err) => {
- match stun::parse_binding_request(pkt) {
- Ok(txn) => {
- // Is this our hairpin request?
- match self.in_flight_stun_requests.remove(&txn) {
- Some(inf) => {
- debug!(%src, %txn, "received our hairpin STUN request");
- let elapsed = inf.start.elapsed();
- inf.s.send((elapsed, src)).ok();
+ // TODO: randomize choice?
+ const MAX_RELAYS: usize = 5;
+
+ let mut v4_buf = JoinSet::new();
+ let cancel_v4 = CancellationToken::new();
+ let mut v6_buf = JoinSet::new();
+ let cancel_v6 = CancellationToken::new();
+
+ for relay_node in self.relay_map.nodes().take(MAX_RELAYS) {
+ if if_state.have_v4 {
+ debug!(?relay_node.url, "v4 QAD probe");
+ let ip_mapped_addrs = self.socket_state.ip_mapped_addrs.clone();
+ let relay_node = relay_node.clone();
+ let dns_resolver = self.socket_state.dns_resolver.clone();
+ let quic_client = quic_client.clone();
+ let relay_url = relay_node.url.clone();
+ v4_buf.spawn(
+ cancel_v4
+ .child_token()
+ .run_until_cancelled_owned(time::timeout(
+ PROBES_TIMEOUT,
+ run_probe_v4(ip_mapped_addrs, relay_node, quic_client, dns_resolver),
+ ))
+ .instrument(info_span!("QAD IPv6", %relay_url)),
+ );
+ }
+
+ if if_state.have_v6 {
+ debug!(?relay_node.url, "v6 QAD probe");
+ let ip_mapped_addrs = self.socket_state.ip_mapped_addrs.clone();
+ let relay_node = relay_node.clone();
+ let dns_resolver = self.socket_state.dns_resolver.clone();
+ let quic_client = quic_client.clone();
+ let relay_url = relay_node.url.clone();
+ v6_buf.spawn(
+ cancel_v6
+ .child_token()
+ .run_until_cancelled_owned(time::timeout(
+ PROBES_TIMEOUT,
+ run_probe_v6(ip_mapped_addrs, relay_node, quic_client, dns_resolver),
+ ))
+ .instrument(info_span!("QAD IPv6", %relay_url)),
+ );
+ }
+ }
+
+ let mut reports = Vec::new();
+
+ loop {
+ if reports.len() >= enough_relays {
+ debug!("enough probes: {}", reports.len());
+ cancel_v4.cancel();
+ cancel_v6.cancel();
+ break;
+ }
+
+ tokio::select! {
+ biased;
+
+ val = v4_buf.join_next(), if !v4_buf.is_empty() => {
+ match val {
+ Some(Ok(Some(Ok(res)))) => {
+ match res {
+ Ok((r, conn)) => {
+ debug!(?r, "got v4 QAD conn");
+ let url = r.node.clone();
+ reports.push(ProbeReport::QadIpv4(r));
+ if self.qad_conns.v4.is_none() {
+ self.qad_conns.v4.replace((url, conn));
+ } else {
+ conn.conn.close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON);
+ }
+ }
+ Err(err) => {
+ debug!("probe v4 failed: {:?}", err);
+ }
}
- None => {
- debug!(%src, %txn, "unknown STUN request");
+ }
+ Some(Err(err)) => {
+ if err.is_panic() {
+ panic!("probe v4 panicked: {:?}", err);
}
+ warn!("probe v4 failed: {:?}", err);
}
+ Some(Ok(None)) => {
+ debug!("probe v4 canceled");
+ }
+ Some(Ok(Some(Err(time::Elapsed { .. })))) => {
+ debug!("probe v4 timed out");
+ }
+ None => {}
}
- Err(_) => {
- debug!(%src, "received invalid STUN response: {err:#}");
+ }
+ val = v6_buf.join_next(), if !v6_buf.is_empty() => {
+ match val {
+ Some(Ok(Some(Ok(res)))) => {
+ match res {
+ Ok((r, conn)) => {
+ debug!(?r, "got v6 QAD conn");
+ let url = r.node.clone();
+ reports.push(ProbeReport::QadIpv6(r));
+ if self.qad_conns.v6.is_none() {
+ self.qad_conns.v6.replace((url, conn));
+ } else {
+ conn.conn.close(QUIC_ADDR_DISC_CLOSE_CODE, QUIC_ADDR_DISC_CLOSE_REASON);
+ }
+ }
+ Err(err) => {
+ debug!("probe v6 failed: {:?}", err);
+ }
+ }
+ }
+ Some(Err(err)) => {
+ if err.is_panic() {
+ panic!("probe v6 panicked: {:?}", err);
+ }
+ warn!("probe v6 failed: {:?}", err);
+ }
+ Some(Ok(None)) => {
+ debug!("probe v6 canceled");
+ }
+ Some(Ok(Some(Err(time::Elapsed { .. })))) => {
+ debug!("probe v6 timed out");
+ }
+ None => {}
}
}
+ else => {
+ break;
+ }
}
}
- }
- /// Handles [`Message::InFlightStun`].
- ///
- /// The in-flight request is added to [`Actor::in_flight_stun_requests`] so that
- /// [`Actor::handle_stun_packet`] can forward packets correctly.
- ///
- /// *response_tx* is to signal the actor message has been handled.
- fn handle_in_flight_stun(&mut self, inflight: Inflight, response_tx: oneshot::Sender<()>) {
- self.in_flight_stun_requests.insert(inflight.txn, inflight);
- response_tx.send(()).ok();
+ reports
}
- fn finish_and_store_report(&mut self, report: Report) -> Arc {
- let report = self.add_report_history_and_set_preferred_relay(report);
- debug!("{report:?}");
- report
+ fn have_enough_reports(&self, enough_relays: usize, report: &Report) -> Option {
+ // Once we've heard from enough relay servers (3), start a timer to give up on the other
+ // probes. The timer's duration is a function of whether this is our initial full
+ // probe or an incremental one. For incremental ones, wait for the duration of the
+ // slowest relay. For initial ones, double that.
+ let latencies: Vec = report.relay_latency.iter().map(|(_, l)| l).collect();
+ let have_enough_latencies = latencies.len() >= enough_relays;
+
+ if have_enough_latencies {
+ let timeout = match self.reports.last.is_some() {
+ true => Duration::from_secs(0),
+ false => latencies
+ .iter()
+ .max()
+ .copied()
+ .unwrap_or(DEFAULT_MAX_LATENCY),
+ };
+ debug!(
+ reports=latencies.len(),
+ delay=?timeout,
+ "Have enough probe reports, aborting further probes soon",
+ );
+
+ Some(timeout)
+ } else {
+ None
+ }
}
/// Adds `r` to the set of recent Reports and mutates `r.preferred_relay` to contain the best recent one.
- /// `r` is stored ref counted and a reference is returned.
- fn add_report_history_and_set_preferred_relay(&mut self, mut r: Report) -> Arc {
+ fn add_report_history_and_set_preferred_relay(&mut self, r: &mut Report) {
let mut prev_relay = None;
if let Some(ref last) = self.reports.last {
prev_relay.clone_from(&last.preferred_relay);
+
+ // If we don't have new information, copy this from the last report
+ if r.mapping_varies_by_dest_ipv4.is_none() {
+ r.mapping_varies_by_dest_ipv4 = last.mapping_varies_by_dest_ipv4;
+ }
+ if r.mapping_varies_by_dest_ipv6.is_none() {
+ r.mapping_varies_by_dest_ipv6 = last.mapping_varies_by_dest_ipv6;
+ }
}
+
let now = Instant::now();
const MAX_AGE: Duration = Duration::from_secs(5 * 60);
// relay ID => its best recent latency in last MAX_AGE
- let mut best_recent = RelayLatencies::new();
+ let mut best_recent = RelayLatencies::default();
// chain the current report as we are still mutating it
let prevs_iter = self
.reports
.prev
.iter()
- .map(|(a, b)| -> (&Instant, &Report) { (a, b) })
- .chain(std::iter::once((&now, &r)));
+ .map(|(a, b)| -> (&Instant, &Report) { (a, b) });
let mut to_remove = Vec::new();
for (t, pr) in prevs_iter {
@@ -742,6 +628,8 @@ impl Actor {
}
best_recent.merge(&pr.relay_latency);
}
+ // merge in current run
+ best_recent.merge(&r.relay_latency);
for t in to_remove {
self.reports.prev.remove(&t);
@@ -776,137 +664,148 @@ impl Actor {
}
}
- let r = Arc::new(r);
self.reports.prev.insert(now, r.clone());
self.reports.last = Some(r.clone());
-
- r
}
}
-/// State the net_report actor needs for an in-progress report generation.
-#[derive(Debug)]
-struct ReportRun {
- /// The handle of the [`reportgen`] actor, cancels the actor on drop.
- _reportgen: reportgen::Client,
- /// Where to send the completed report.
- report_tx: oneshot::Sender, ReportError>>,
-}
-
-#[allow(missing_docs)]
-#[common_fields({
- backtrace: Option,
- #[snafu(implicit)]
- span_trace: n0_snafu::SpanTrace,
-})]
-#[derive(Debug, Snafu)]
-#[non_exhaustive]
-pub enum ReportError {
- #[snafu(display("Report aborted early"))]
- Abort { reason: ActorRunError },
- #[snafu(display("Report generation is already running"))]
- AlreadyRunning {},
- #[snafu(display("Internal actor is gone"))]
- ActorGone {},
-}
-
-/// Test if IPv6 works at all, or if it's been hard disabled at the OS level.
#[cfg(not(wasm_browser))]
-fn os_has_ipv6() -> bool {
- UdpSocket::bind_local_v6(0).is_ok()
-}
-
-/// Always returns false in browsers
-#[cfg(wasm_browser)]
-fn os_has_ipv6() -> bool {
- false
+async fn run_probe_v4(
+ ip_mapped_addrs: Option,
+ relay_node: Arc,
+ quic_client: QuicClient,
+ dns_resolver: DnsResolver,
+) -> n0_snafu::Result<(QadProbeReport, QadConn)> {
+ use n0_snafu::ResultExt;
+
+ let relay_addr_orig = reportgen::get_relay_addr_ipv4(&dns_resolver, &relay_node).await?;
+ let relay_addr =
+ reportgen::maybe_to_mapped_addr(ip_mapped_addrs.as_ref(), relay_addr_orig.into());
+
+ debug!(?relay_addr_orig, ?relay_addr, "relay addr v4");
+ let host = relay_node.url.host_str().context("missing host url")?;
+ let conn = quic_client.create_conn(relay_addr, host).await?;
+ let mut receiver = conn.observed_external_addr();
+
+ // wait for an addr
+ let addr = receiver
+ .wait_for(|addr| addr.is_some())
+ .await
+ .context("receiver dropped")?
+ .expect("known");
+ let report = QadProbeReport {
+ node: relay_node.url.clone(),
+ addr: SocketAddr::new(addr.ip().to_canonical(), addr.port()),
+ latency: conn.rtt(),
+ };
+
+ let observer = Watchable::new(None);
+ let ob = observer.clone();
+ let node = relay_node.url.clone();
+ let conn2 = conn.clone();
+ let handle = task::spawn(async move {
+ loop {
+ let val = *receiver.borrow();
+ // if we've sent to an ipv4 address, but received an observed address
+ // that is ivp6 then the address is an [IPv4-Mapped IPv6 Addresses](https://doc.rust-lang.org/beta/std/net/struct.Ipv6Addr.html#ipv4-mapped-ipv6-addresses)
+ let val = val.map(|val| SocketAddr::new(val.ip().to_canonical(), val.port()));
+ let latency = conn2.rtt();
+ trace!(?val, ?relay_addr, ?latency, "got addr V4");
+ if ob
+ .set(val.map(|addr| QadProbeReport {
+ node: node.clone(),
+ addr,
+ latency,
+ }))
+ .is_err()
+ {
+ // cancel if the observer is gone
+ break;
+ }
+ if receiver.changed().await.is_err() {
+ break;
+ }
+ }
+ });
+ let handle = AbortOnDropHandle::new(handle);
+
+ Ok((
+ report,
+ QadConn {
+ conn,
+ observer,
+ _handle: handle,
+ },
+ ))
}
#[cfg(not(wasm_browser))]
-pub(crate) mod stun_utils {
- use netwatch::IpFamily;
- use tokio_util::sync::CancellationToken;
-
- use super::*;
-
- /// Attempts to bind a local socket to send STUN packets from.
- ///
- /// If successful this returns the bound socket and will forward STUN responses to the
- /// provided *actor_addr*. The *cancel_token* serves to stop the packet forwarding when the
- /// socket is no longer needed.
- pub fn bind_local_stun_socket(
- network: IpFamily,
- actor_addr: Addr,
- cancel_token: CancellationToken,
- ) -> Option> {
- let sock = match UdpSocket::bind(network, 0) {
- Ok(sock) => Arc::new(sock),
- Err(err) => {
- debug!("failed to bind STUN socket: {}", err);
- return None;
+async fn run_probe_v6(
+ ip_mapped_addrs: Option,
+ relay_node: Arc,
+ quic_client: QuicClient,
+ dns_resolver: DnsResolver,
+) -> n0_snafu::Result<(QadProbeReport, QadConn)> {
+ use n0_snafu::ResultExt;
+ let relay_addr_orig = reportgen::get_relay_addr_ipv6(&dns_resolver, &relay_node).await?;
+ let relay_addr =
+ reportgen::maybe_to_mapped_addr(ip_mapped_addrs.as_ref(), relay_addr_orig.into());
+
+ debug!(?relay_addr_orig, ?relay_addr, "relay addr v6");
+ let host = relay_node.url.host_str().context("missing host url")?;
+ let conn = quic_client.create_conn(relay_addr, host).await?;
+ let mut receiver = conn.observed_external_addr();
+
+ // wait for an addr
+ let addr = receiver
+ .wait_for(|addr| addr.is_some())
+ .await
+ .context("receiver dropped")?
+ .expect("known");
+ let report = QadProbeReport {
+ node: relay_node.url.clone(),
+ addr: SocketAddr::new(addr.ip().to_canonical(), addr.port()),
+ latency: conn.rtt(),
+ };
+
+ let observer = Watchable::new(None);
+ let ob = observer.clone();
+ let node = relay_node.url.clone();
+ let conn2 = conn.clone();
+ let handle = task::spawn(async move {
+ loop {
+ let val = *receiver.borrow();
+ // if we've sent to an ipv4 address, but received an observed address
+ // that is ivp6 then the address is an IPv4-Mapped IPv6 Addresses
+ let val = val.map(|val| SocketAddr::new(val.ip().to_canonical(), val.port()));
+ let latency = conn2.rtt();
+ trace!(?val, ?relay_addr, ?latency, "got addr V6");
+ if ob
+ .set(val.map(|addr| QadProbeReport {
+ node: node.clone(),
+ addr,
+ latency,
+ }))
+ .is_err()
+ {
+ // cancel if the observer is gone
+ break;
+ }
+ if receiver.changed().await.is_err() {
+ break;
}
- };
- let span = info_span!(
- "stun_udp_listener",
- local_addr = sock
- .local_addr()
- .map(|a| a.to_string())
- .unwrap_or(String::from("-")),
- );
- {
- let sock = sock.clone();
- task::spawn(
- async move {
- debug!("udp stun socket listener started");
- // TODO: Can we do better for buffers here? Probably doesn't matter much.
- let mut buf = vec![0u8; 64 << 10];
- loop {
- tokio::select! {
- biased;
- _ = cancel_token.cancelled() => break,
- res = recv_stun_once(&sock, &mut buf, &actor_addr) => {
- if let Err(err) = res {
- warn!(%err, "stun recv failed");
- break;
- }
- }
- }
- }
- debug!("udp stun socket listener stopped");
- }
- .instrument(span),
- );
}
- Some(sock)
- }
-
- #[derive(Debug, Snafu)]
- enum RecvStunError {
- #[snafu(transparent)]
- Recv { source: std::io::Error },
- #[snafu(display("Internal actor is gone"))]
- ActorGone,
- }
-
- /// Receive STUN response from a UDP socket, pass it to the actor.
- async fn recv_stun_once(
- sock: &UdpSocket,
- buf: &mut [u8],
- actor_addr: &Addr,
- ) -> Result<(), RecvStunError> {
- let (count, mut from_addr) = sock.recv_from(buf).await?;
-
- let payload = &buf[..count];
- from_addr.set_ip(from_addr.ip().to_canonical());
- let msg = Message::StunPacket {
- payload: Bytes::from(payload.to_vec()),
- from_addr,
- };
- actor_addr
- .send(msg)
- .await
- .map_err(|_| ActorGoneSnafu.build())
- }
+ });
+ let handle = AbortOnDropHandle::new(handle);
+
+ Ok((
+ report,
+ QadConn {
+ conn,
+ observer,
+ _handle: handle,
+ },
+ ))
}
#[cfg(test)]
@@ -924,8 +823,6 @@ mod test_utils {
});
let node_desc = RelayNode {
url: server.https_url().expect("should work as relay"),
- stun_only: false, // the checks above and below guarantee both stun and relay
- stun_port: server.stun_addr().expect("server should serve stun").port(),
quic,
};
@@ -950,173 +847,55 @@ mod test_utils {
#[cfg(test)]
mod tests {
- use bytes::BytesMut;
+ use std::net::{Ipv4Addr, SocketAddr};
+
+ use iroh_base::RelayUrl;
+ use iroh_relay::dns::DnsResolver;
use n0_snafu::{Result, ResultExt};
- use netwatch::IpFamily;
use tokio_util::sync::CancellationToken;
- use tracing::info;
use tracing_test::traced_test;
use super::*;
- use crate::net_report::{dns, stun_utils::bind_local_stun_socket};
-
- mod stun_utils {
- //! Utils for testing that expose a simple stun server.
-
- use std::{net::IpAddr, sync::Arc};
-
- use iroh_base::RelayUrl;
- use iroh_relay::RelayNode;
- use tokio::{
- net,
- sync::{oneshot, Mutex},
- };
- use tracing::{debug, trace};
-
- use super::*;
-
- /// A drop guard to clean up test infrastructure.
- ///
- /// After dropping the test infrastructure will asynchronously shutdown and release its
- /// resources.
- // Nightly sees the sender as dead code currently, but we only rely on Drop of the
- // sender.
- #[derive(Debug)]
- pub struct CleanupDropGuard {
- _guard: oneshot::Sender<()>,
- }
-
- // (read_ipv4, read_ipv6)
- #[derive(Debug, Default, Clone)]
- pub struct StunStats(Arc>);
-
- impl StunStats {
- pub async fn total(&self) -> usize {
- let s = self.0.lock().await;
- s.0 + s.1
- }
- }
-
- pub fn relay_map_of(stun: impl Iterator- ) -> RelayMap {
- relay_map_of_opts(stun.map(|addr| (addr, true)))
- }
-
- pub fn relay_map_of_opts(stun: impl Iterator
- ) -> RelayMap {
- let nodes = stun.map(|(addr, stun_only)| {
- let host = addr.ip();
- let port = addr.port();
-
- let url: RelayUrl = format!("http://{host}:{port}").parse().unwrap();
- RelayNode {
- url,
- stun_port: port,
- stun_only,
- quic: None,
- }
- });
- RelayMap::from_iter(nodes)
- }
+ use crate::net_report::probes::Probe;
- /// Sets up a simple STUN server binding to `0.0.0.0:0`.
- ///
- /// See [`serve`] for more details.
- pub(crate) async fn serve_v4() -> std::io::Result<(SocketAddr, StunStats, CleanupDropGuard)>
- {
- serve(std::net::Ipv4Addr::UNSPECIFIED.into()).await
- }
-
- /// Sets up a simple STUN server.
- pub(crate) async fn serve(
- ip: IpAddr,
- ) -> std::io::Result<(SocketAddr, StunStats, CleanupDropGuard)> {
- let stats = StunStats::default();
-
- let pc = net::UdpSocket::bind((ip, 0)).await?;
- let mut addr = pc.local_addr()?;
- match addr.ip() {
- IpAddr::V4(ip) => {
- if ip.octets() == [0, 0, 0, 0] {
- addr.set_ip("127.0.0.1".parse().unwrap());
- }
- }
- _ => unreachable!("using ipv4"),
- }
-
- println!("STUN listening on {}", addr);
- let (_guard, r) = oneshot::channel();
- let stats_c = stats.clone();
- tokio::task::spawn(async move {
- run_stun(pc, stats_c, r).await;
- });
-
- Ok((addr, stats, CleanupDropGuard { _guard }))
- }
-
- async fn run_stun(pc: net::UdpSocket, stats: StunStats, mut done: oneshot::Receiver<()>) {
- let mut buf = vec![0u8; 64 << 10];
- loop {
- trace!("read loop");
- tokio::select! {
- _ = &mut done => {
- debug!("shutting down");
- break;
- }
- res = pc.recv_from(&mut buf) => match res {
- Ok((n, addr)) => {
- trace!("read packet {}bytes from {}", n, addr);
- let pkt = &buf[..n];
- if !stun::is(pkt) {
- debug!("received non STUN pkt");
- continue;
- }
- if let Ok(txid) = stun::parse_binding_request(pkt) {
- debug!("received binding request");
- let mut s = stats.0.lock().await;
- if addr.is_ipv4() {
- s.0 += 1;
- } else {
- s.1 += 1;
- }
- drop(s);
-
- let res = stun::response(txid, addr);
- if let Err(err) = pc.send_to(&res, addr).await {
- eprintln!("STUN server write failed: {:?}", err);
- }
- }
- }
- Err(err) => {
- eprintln!("failed to read: {:?}", err);
- }
- }
- }
- }
- }
- }
-
- #[tokio::test]
+ #[tokio::test(flavor = "multi_thread")]
#[traced_test]
- async fn test_basic() -> Result {
- let (stun_addr, stun_stats, _cleanup_guard) =
- stun_utils::serve("127.0.0.1".parse().unwrap()).await.e()?;
+ async fn test_basic() -> Result<()> {
+ let (server, relay) = test_utils::relay().await;
+ let client_config = iroh_relay::client::make_dangerous_client_config();
+ let ep = quinn::Endpoint::client(SocketAddr::new(Ipv4Addr::LOCALHOST.into(), 0)).e()?;
+ let quic_addr_disc = QuicConfig {
+ ep: ep.clone(),
+ client_config,
+ ipv4: true,
+ ipv6: true,
+ };
+ let relay_map = RelayMap::from(relay);
- let resolver = dns::tests::resolver();
- let mut client = Client::new(None, resolver.clone(), None, Default::default());
- let dm = stun_utils::relay_map_of([stun_addr].into_iter());
+ let resolver = DnsResolver::new();
+ let opts = Options::default()
+ .quic_config(Some(quic_addr_disc.clone()))
+ .insecure_skip_relay_cert_verify(true);
+ let mut client = Client::new(
+ resolver.clone(),
+ None,
+ relay_map.clone(),
+ opts.clone(),
+ Default::default(),
+ );
+ let if_state = IfStateDetails::fake();
// Note that the ProbePlan will change with each iteration.
for i in 0..5 {
let cancel = CancellationToken::new();
- let sock = bind_local_stun_socket(IpFamily::V4, client.addr(), cancel.clone());
println!("--round {}", i);
- let r = client.get_report_all(dm.clone(), sock, None, None).await?;
-
- assert!(r.udp, "want UDP");
- assert_eq!(
- r.relay_latency.len(),
- 1,
- "expected 1 key in RelayLatency; got {}",
- r.relay_latency.len()
+ let r = client.get_report(if_state.clone(), false).await;
+
+ assert!(r.has_udp(), "want UDP");
+ dbg!(&r);
+ assert!(
+ !r.relay_latency.is_empty(),
+ "expected at least 1 key in RelayLatency; got none",
);
assert!(
r.relay_latency.iter().next().is_some(),
@@ -1128,11 +907,9 @@ mod tests {
cancel.cancel();
}
- assert!(
- stun_stats.total().await >= 5,
- "expected at least 5 stun, got {}",
- stun_stats.total().await,
- );
+ drop(client);
+ ep.wait_idle().await;
+ server.shutdown().await?;
Ok(())
}
@@ -1144,23 +921,24 @@ mod tests {
}
// report returns a *Report from (relay host, Duration)+ pairs.
- fn report(a: impl IntoIterator
- ) -> Option
> {
+ fn report(a: impl IntoIterator- ) -> Option
{
let mut report = Report::default();
for (s, d) in a {
assert!(s.starts_with('d'), "invalid relay server key");
let id: u16 = s[1..].parse().unwrap();
- report
- .relay_latency
- .0
- .insert(relay_url(id), Duration::from_secs(d));
+ report.relay_latency.update_relay(
+ relay_url(id),
+ Duration::from_secs(d),
+ Probe::QadIpv4,
+ );
}
- Some(Arc::new(report))
+ Some(report)
}
struct Step {
/// Delay in seconds
after: u64,
- r: Option>,
+ r: Option,
}
struct Test {
name: &'static str,
@@ -1296,18 +1074,20 @@ mod tests {
want_relay: Some(relay_url(2)), // 2 got fast enough
},
];
- let resolver = dns::tests::resolver();
+ let resolver = DnsResolver::new();
for mut tt in tests {
println!("test: {}", tt.name);
- let mut actor = Actor::new(None, resolver.clone(), None, Default::default());
+ let relay_map = RelayMap::empty();
+ let opts = Options::default();
+ let mut client =
+ Client::new(resolver.clone(), None, relay_map, opts, Default::default());
for s in &mut tt.steps {
// trigger the timer
tokio::time::advance(Duration::from_secs(s.after)).await;
- let r = Arc::try_unwrap(s.r.take().unwrap()).unwrap();
- s.r = Some(actor.add_report_history_and_set_preferred_relay(r));
+ client.add_report_history_and_set_preferred_relay(s.r.as_mut().unwrap());
}
let last_report = tt.steps.last().unwrap().r.clone().unwrap();
- let got = actor.reports.prev.len();
+ let got = client.reports.prev.len();
let want = tt.want_prev_len;
assert_eq!(got, want, "prev length");
let got = &last_report.preferred_relay;
@@ -1317,61 +1097,4 @@ mod tests {
Ok(())
}
-
- #[tokio::test]
- async fn test_hairpin() -> Result {
- // Hairpinning is initiated after we discover our own IPv4 socket address (IP +
- // port) via STUN, so the test needs to have a STUN server and perform STUN over
- // IPv4 first. Hairpinning detection works by sending a STUN *request* to **our own
- // public socket address** (IP + port). If the router supports hairpinning the STUN
- // request is returned back to us and received on our public address. This doesn't
- // need to be a STUN request, but STUN already has a unique transaction ID which we
- // can easily use to identify the packet.
-
- // Setup STUN server and create relay_map.
- let (stun_addr, _stun_stats, _done) = stun_utils::serve_v4().await.e()?;
- let dm = stun_utils::relay_map_of([stun_addr].into_iter());
- dbg!(&dm);
-
- let resolver = dns::tests::resolver().clone();
- let mut client = Client::new(None, resolver, None, Default::default());
-
- // Set up an external socket to send STUN requests from, this will be discovered as
- // our public socket address by STUN. We send back any packets received on this
- // socket to the net_report client using Client::receive_stun_packet. Once we sent
- // the hairpin STUN request (from a different randomly bound socket) we are sending
- // it to this socket, which is forwarnding it back to our net_report client, because
- // this dumb implementation just forwards anything even if it would be garbage.
- // Thus hairpinning detection will declare hairpinning to work.
- let sock = UdpSocket::bind_local(netwatch::IpFamily::V4, 0).e()?;
- let sock = Arc::new(sock);
- info!(addr=?sock.local_addr().unwrap(), "Using local addr");
- let task = {
- let sock = sock.clone();
- let addr = client.addr.clone();
- tokio::spawn(
- async move {
- let mut buf = BytesMut::zeroed(64 << 10);
- loop {
- let (count, src) = sock.recv_from(&mut buf).await.unwrap();
- info!(
- addr=?sock.local_addr().unwrap(),
- %count,
- "Forwarding payload to net_report client",
- );
- let payload = buf.split_to(count).freeze();
- addr.receive_stun_packet(payload, src);
- }
- }
- .instrument(info_span!("pkt-fwd")),
- )
- };
-
- let r = client.get_report_all(dm, Some(sock), None, None).await?;
- dbg!(&r);
- assert_eq!(r.hair_pinning, Some(true));
-
- task.abort();
- Ok(())
- }
}
diff --git a/iroh/src/net_report/defaults.rs b/iroh/src/net_report/defaults.rs
index 66f27097114..75782044aef 100644
--- a/iroh/src/net_report/defaults.rs
+++ b/iroh/src/net_report/defaults.rs
@@ -11,15 +11,15 @@ pub(crate) mod timeouts {
/// The total time we wait for all the probes.
///
- /// This includes the STUN, ICMP and HTTPS probes, which will all
+ /// This includes the QAD and HTTPS probes, which will all
/// start at different times based on the ProbePlan.
pub(crate) const PROBES_TIMEOUT: Duration = Duration::from_secs(3);
/// How long to await for a captive-portal result.
///
- /// This delay is chosen so it starts after good-working STUN probes
+ /// This delay is chosen so it starts after good-working QAD probes
/// would have finished, but not too long so the delay is bearable if
- /// STUN is blocked.
+ /// UDP/QAD is blocked.
pub(crate) const CAPTIVE_PORTAL_DELAY: Duration = Duration::from_millis(200);
/// Timeout for captive portal checks
@@ -29,10 +29,4 @@ pub(crate) mod timeouts {
pub(crate) const CAPTIVE_PORTAL_TIMEOUT: Duration = Duration::from_secs(2);
pub(crate) const DNS_TIMEOUT: Duration = Duration::from_secs(3);
-
- /// The amount of time we wait for a hairpinned packet to come back.
- pub(crate) const HAIRPIN_CHECK_TIMEOUT: Duration = Duration::from_millis(100);
-
- /// Default Pinger timeout
- pub(crate) const DEFAULT_PINGER_TIMEOUT: Duration = Duration::from_secs(5);
}
diff --git a/iroh/src/net_report/dns.rs b/iroh/src/net_report/dns.rs
deleted file mode 100644
index 1a46b339436..00000000000
--- a/iroh/src/net_report/dns.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Delay used to perform staggered dns queries.
-pub(crate) const DNS_STAGGERING_MS: &[u64] = &[200, 300];
-
-#[cfg(test)]
-pub(crate) mod tests {
- use iroh_relay::dns::DnsResolver;
-
- /// Get a DNS resolver suitable for testing.
- pub fn resolver() -> DnsResolver {
- DnsResolver::new()
- }
-}
diff --git a/iroh/src/net_report/ip_mapped_addrs.rs b/iroh/src/net_report/ip_mapped_addrs.rs
index 8a16db7a921..d555d8b506d 100644
--- a/iroh/src/net_report/ip_mapped_addrs.rs
+++ b/iroh/src/net_report/ip_mapped_addrs.rs
@@ -18,7 +18,7 @@ pub struct IpMappedAddrError;
///
/// It is essentially a lookup key for an IP that iroh's magicsocket knows about.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
-pub struct IpMappedAddr(Ipv6Addr);
+pub(crate) struct IpMappedAddr(Ipv6Addr);
/// Counter to always generate unique addresses for [`IpMappedAddr`].
static IP_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1);
@@ -38,7 +38,7 @@ impl IpMappedAddr {
///
/// This generates a new IPv6 address in the Unique Local Address range (RFC 4193)
/// which is recognised by iroh as an IP mapped address.
- pub fn generate() -> Self {
+ pub(super) fn generate() -> Self {
let mut addr = [0u8; 16];
addr[0] = Self::ADDR_PREFIXL;
addr[1..6].copy_from_slice(&Self::ADDR_GLOBAL_ID);
@@ -57,7 +57,7 @@ impl IpMappedAddr {
/// This uses a made-up, but fixed port number. The [IpMappedAddresses`] map this is
/// made for creates a unique [`IpMappedAddr`] for each IP+port and thus does not use
/// the port to map back to the original [`SocketAddr`].
- pub fn private_socket_addr(&self) -> SocketAddr {
+ pub(crate) fn private_socket_addr(&self) -> SocketAddr {
SocketAddr::new(IpAddr::from(self.0), Self::MAPPED_ADDR_PORT)
}
}
@@ -88,10 +88,10 @@ impl std::fmt::Display for IpMappedAddr {
// mechanisms for keeping track of "aliveness" and pruning address, as we do
// with the `NodeMap`
#[derive(Debug, Clone, Default)]
-pub struct IpMappedAddresses(Arc>);
+pub(crate) struct IpMappedAddresses(Arc>);
#[derive(Debug, Default)]
-pub struct Inner {
+pub(super) struct Inner {
by_mapped_addr: BTreeMap,
/// Because [`std::net::SocketAddrV6`] contains extra fields besides the IP
/// address and port (ie, flow_info and scope_id), the a [`std::net::SocketAddrV6`]
@@ -101,18 +101,13 @@ pub struct Inner {
}
impl IpMappedAddresses {
- /// Creates an empty [`IpMappedAddresses`].
- pub fn new() -> Self {
- Self(Arc::new(std::sync::Mutex::new(Inner::default())))
- }
-
/// Adds a [`SocketAddr`] to the map and returns the generated [`IpMappedAddr`].
///
/// If this [`SocketAddr`] already exists in the map, it returns its
/// associated [`IpMappedAddr`].
///
/// Otherwise a new [`IpMappedAddr`] is generated for it and returned.
- pub fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr {
+ pub(super) fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr {
let ip_port = (socket_addr.ip(), socket_addr.port());
let mut inner = self.0.lock().expect("poisoned");
if let Some(mapped_addr) = inner.by_ip_port.get(&ip_port) {
@@ -125,14 +120,14 @@ impl IpMappedAddresses {
}
/// Returns the [`IpMappedAddr`] for the given [`SocketAddr`].
- pub fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option {
+ pub(crate) fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option {
let ip_port = (socket_addr.ip(), socket_addr.port());
let inner = self.0.lock().expect("poisoned");
inner.by_ip_port.get(&ip_port).copied()
}
/// Returns the [`SocketAddr`] for the given [`IpMappedAddr`].
- pub fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option {
+ pub(crate) fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option {
let inner = self.0.lock().expect("poisoned");
inner.by_mapped_addr.get(mapped_addr).copied()
}
diff --git a/iroh/src/net_report/metrics.rs b/iroh/src/net_report/metrics.rs
index 1698c1d8ae7..ccfd9ba9380 100644
--- a/iroh/src/net_report/metrics.rs
+++ b/iroh/src/net_report/metrics.rs
@@ -6,16 +6,6 @@ use serde::{Deserialize, Serialize};
#[metrics(name = "net_report")]
#[non_exhaustive]
pub struct Metrics {
- /// Incoming STUN packets dropped due to a full receiving queue.
- pub stun_packets_dropped: Counter,
- /// Number of IPv4 STUN packets sent.
- pub stun_packets_sent_ipv4: Counter,
- /// Number of IPv6 STUN packets sent.
- pub stun_packets_sent_ipv6: Counter,
- /// Number of IPv4 STUN packets received.
- pub stun_packets_recv_ipv4: Counter,
- /// Number of IPv6 STUN packets received.
- pub stun_packets_recv_ipv6: Counter,
/// Number of reports executed by net_report, including full reports.
pub reports: Counter,
/// Number of full reports executed by net_report
diff --git a/iroh/src/net_report/options.rs b/iroh/src/net_report/options.rs
index 5540f56e976..6fd5e2449aa 100644
--- a/iroh/src/net_report/options.rs
+++ b/iroh/src/net_report/options.rs
@@ -4,48 +4,21 @@ pub use imp::Options;
#[cfg(not(wasm_browser))]
mod imp {
- use std::{collections::BTreeSet, sync::Arc};
-
- use netwatch::UdpSocket;
+ use std::collections::BTreeSet;
- use crate::net_report::{reportgen::ProbeProto, QuicConfig};
+ use crate::net_report::{probes::Probe, QuicConfig};
/// Options for running probes
///
- /// By default, will run icmp over IPv4, icmp over IPv6, and Https probes.
+ /// By default, will run Https probes.
///
- /// Use [`Options::stun_v4`], [`Options::stun_v6`], and [`Options::quic_config`]
- /// to enable STUN over IPv4, STUN over IPv6, and QUIC address discovery.
+ /// Use [`Options::quic_config`] to enable QUIC address discovery.
#[derive(Debug, Clone)]
pub struct Options {
- /// Socket to send IPv4 STUN probes from.
- ///
- /// Responses are never read from this socket, they must be passed in via internal
- /// messaging since, when used internally in iroh, the socket is also used to receive
- /// other packets from in the magicsocket (`MagicSock`).
- ///
- /// If not provided, STUN probes will not be sent over IPv4.
- pub(crate) stun_sock_v4: Option>,
- /// Socket to send IPv6 STUN probes from.
- ///
- /// Responses are never read from this socket, they must be passed in via internal
- /// messaging since, when used internally in iroh, the socket is also used to receive
- /// other packets from in the magicsocket (`MagicSock`).
- ///
- /// If not provided, STUN probes will not be sent over IPv6.
- pub(crate) stun_sock_v6: Option>,
/// The configuration needed to launch QUIC address discovery probes.
///
/// If not provided, will not run QUIC address discovery.
pub(crate) quic_config: Option,
- /// Enable icmp_v4 probes
- ///
- /// On by default
- pub(crate) icmp_v4: bool,
- /// Enable icmp_v6 probes
- ///
- /// On by default
- pub(crate) icmp_v6: bool,
/// Enable https probes
///
/// On by default
@@ -58,11 +31,7 @@ mod imp {
impl Default for Options {
fn default() -> Self {
Self {
- stun_sock_v4: None,
- stun_sock_v6: None,
quic_config: None,
- icmp_v4: true,
- icmp_v6: true,
https: true,
#[cfg(any(test, feature = "test-utils"))]
insecure_skip_relay_cert_verify: false,
@@ -74,47 +43,19 @@ mod imp {
/// Create an [`Options`] that disables all probes
pub fn disabled() -> Self {
Self {
- stun_sock_v4: None,
- stun_sock_v6: None,
quic_config: None,
- icmp_v4: false,
- icmp_v6: false,
https: false,
#[cfg(any(test, feature = "test-utils"))]
insecure_skip_relay_cert_verify: false,
}
}
- /// Set the ipv4 stun socket and enable ipv4 stun probes
- pub fn stun_v4(mut self, sock: Option>) -> Self {
- self.stun_sock_v4 = sock;
- self
- }
-
- /// Set the ipv6 stun socket and enable ipv6 stun probes
- pub fn stun_v6(mut self, sock: Option>) -> Self {
- self.stun_sock_v6 = sock;
- self
- }
-
/// Enable quic probes
pub fn quic_config(mut self, quic_config: Option) -> Self {
self.quic_config = quic_config;
self
}
- /// Enable or disable icmp_v4 probe
- pub fn icmp_v4(mut self, enable: bool) -> Self {
- self.icmp_v4 = enable;
- self
- }
-
- /// Enable or disable icmp_v6 probe
- pub fn icmp_v6(mut self, enable: bool) -> Self {
- self.icmp_v6 = enable;
- self
- }
-
/// Enable or disable https probe
pub fn https(mut self, enable: bool) -> Self {
self.https = enable;
@@ -129,30 +70,18 @@ mod imp {
}
/// Turn the options into set of valid protocols
- pub(crate) fn to_protocols(&self) -> BTreeSet {
+ pub fn as_protocols(&self) -> BTreeSet {
let mut protocols = BTreeSet::new();
- if self.stun_sock_v4.is_some() {
- protocols.insert(ProbeProto::StunIpv4);
- }
- if self.stun_sock_v6.is_some() {
- protocols.insert(ProbeProto::StunIpv6);
- }
if let Some(ref quic) = self.quic_config {
if quic.ipv4 {
- protocols.insert(ProbeProto::QuicIpv4);
+ protocols.insert(Probe::QadIpv4);
}
if quic.ipv6 {
- protocols.insert(ProbeProto::QuicIpv6);
+ protocols.insert(Probe::QadIpv6);
}
}
- if self.icmp_v4 {
- protocols.insert(ProbeProto::IcmpV4);
- }
- if self.icmp_v6 {
- protocols.insert(ProbeProto::IcmpV6);
- }
if self.https {
- protocols.insert(ProbeProto::Https);
+ protocols.insert(Probe::Https);
}
protocols
}
@@ -163,7 +92,7 @@ mod imp {
mod imp {
use std::collections::BTreeSet;
- use crate::net_report::reportgen::ProbeProto;
+ use crate::net_report::Probe;
/// Options for running probes (in browsers).
///
@@ -196,10 +125,10 @@ mod imp {
}
/// Turn the options into set of valid protocols
- pub(crate) fn to_protocols(&self) -> BTreeSet {
+ pub(crate) fn as_protocols(&self) -> BTreeSet {
let mut protocols = BTreeSet::new();
if self.https {
- protocols.insert(ProbeProto::Https);
+ protocols.insert(Probe::Https);
}
protocols
}
diff --git a/iroh/src/net_report/ping.rs b/iroh/src/net_report/ping.rs
deleted file mode 100644
index c887ea1e99c..00000000000
--- a/iroh/src/net_report/ping.rs
+++ /dev/null
@@ -1,182 +0,0 @@
-//! Allows sending ICMP echo requests to a host in order to determine network latency.
-
-use std::{
- fmt::Debug,
- net::IpAddr,
- sync::{Arc, Mutex},
-};
-
-use n0_future::time::Duration;
-use nested_enum_utils::common_fields;
-use snafu::{ResultExt, Snafu};
-use surge_ping::{Client, Config, IcmpPacket, PingIdentifier, PingSequence, ICMP};
-use tracing::debug;
-
-use crate::net_report::defaults::timeouts::DEFAULT_PINGER_TIMEOUT as DEFAULT_TIMEOUT;
-
-/// Whether this error was because we couldn't create a client or a send error.
-#[allow(missing_docs)]
-#[common_fields({
- backtrace: Option,
- #[snafu(implicit)]
- span_trace: n0_snafu::SpanTrace,
-})]
-#[derive(Debug, Snafu)]
-#[non_exhaustive]
-pub enum PingError {
- #[snafu(display("failed to create IPv4 ping client"))]
- CreateClientIpv4 { source: std::io::Error },
- #[snafu(display("failed to create IPv6 ping client"))]
- CreateClientIpv6 { source: std::io::Error },
- /// Could not send ping.
- #[snafu(display("failed to send ping"))]
- Ping { source: surge_ping::SurgeError },
-}
-
-/// Allows sending ICMP echo requests to a host in order to determine network latency.
-/// Will gracefully handle both IPv4 and IPv6.
-#[derive(Debug, Clone, Default)]
-pub struct Pinger(Arc);
-
-impl Debug for Inner {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- f.debug_struct("Inner").finish()
- }
-}
-
-#[derive(Default)]
-struct Inner {
- client_v6: Mutex>,
- client_v4: Mutex >,
-}
-
-impl Pinger {
- /// Create a new [Pinger].
- pub fn new() -> Self {
- Default::default()
- }
-
- /// Lazily create the ping client.
- ///
- /// We do this because it means we do not bind a socket until we really try to send a
- /// ping. It makes it more transparent to use the pinger.
- fn get_client(&self, kind: ICMP) -> Result {
- let client = match kind {
- ICMP::V4 => {
- let mut opt_client = self.0.client_v4.lock().expect("poisoned");
- match *opt_client {
- Some(ref client) => client.clone(),
- None => {
- let cfg = Config::builder().kind(kind).build();
- let client = Client::new(&cfg).context(CreateClientIpv4Snafu)?;
- *opt_client = Some(client.clone());
- client
- }
- }
- }
- ICMP::V6 => {
- let mut opt_client = self.0.client_v6.lock().expect("poisoned");
- match *opt_client {
- Some(ref client) => client.clone(),
- None => {
- let cfg = Config::builder().kind(kind).build();
- let client = Client::new(&cfg).context(CreateClientIpv6Snafu)?;
- *opt_client = Some(client.clone());
- client
- }
- }
- }
- };
- Ok(client)
- }
-
- /// Send a ping request with associated data, returning the perceived latency.
- pub async fn send(&self, addr: IpAddr, data: &[u8]) -> Result {
- let client = match addr {
- IpAddr::V4(_) => self.get_client(ICMP::V4)?,
- IpAddr::V6(_) => self.get_client(ICMP::V6)?,
- };
- let ident = PingIdentifier(rand::random());
- debug!(%addr, %ident, "Creating pinger");
- let mut pinger = client.pinger(addr, ident).await;
- pinger.timeout(DEFAULT_TIMEOUT); // todo: timeout too large for net_report
- match pinger
- .ping(PingSequence(0), data)
- .await
- .context(PingSnafu)?
- {
- (IcmpPacket::V4(packet), dur) => {
- debug!(
- "{} bytes from {}: icmp_seq={} ttl={:?} time={:0.2?}",
- packet.get_size(),
- packet.get_source(),
- packet.get_sequence(),
- packet.get_ttl(),
- dur
- );
- Ok(dur)
- }
-
- (IcmpPacket::V6(packet), dur) => {
- debug!(
- "{} bytes from {}: icmp_seq={} hlim={} time={:0.2?}",
- packet.get_size(),
- packet.get_source(),
- packet.get_sequence(),
- packet.get_max_hop_limit(),
- dur
- );
- Ok(dur)
- }
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use std::net::{Ipv4Addr, Ipv6Addr};
-
- use tracing::error;
- use tracing_test::traced_test;
-
- use super::*;
-
- // See net_report::reportgen::tests::test_icmp_probe_eu_relay for permissions to ping.
- #[tokio::test]
- #[traced_test]
- async fn test_ping_localhost() {
- let pinger = Pinger::new();
-
- match pinger.send(Ipv4Addr::LOCALHOST.into(), b"data").await {
- Ok(duration) => {
- assert!(!duration.is_zero());
- }
- Err(
- PingError::CreateClientIpv4 { source, .. }
- | PingError::CreateClientIpv6 { source, .. },
- ) => {
- // We don't have permission, too bad.
- error!("no ping permissions: {source:#}");
- }
- Err(PingError::Ping { source, .. }) => {
- panic!("ping failed: {source:#}");
- }
- }
-
- match pinger.send(Ipv6Addr::LOCALHOST.into(), b"data").await {
- Ok(duration) => {
- assert!(!duration.is_zero());
- }
- Err(
- PingError::CreateClientIpv4 { source, .. }
- | PingError::CreateClientIpv6 { source, .. },
- ) => {
- // We don't have permission, too bad.
- error!("no ping permissions: {source:#}");
- }
- Err(PingError::Ping { source, .. }) => {
- error!("ping failed, probably no IPv6 stack: {source:#}");
- }
- }
- }
-}
diff --git a/iroh/src/net_report/probes.rs b/iroh/src/net_report/probes.rs
new file mode 100644
index 00000000000..f25f7053a31
--- /dev/null
+++ b/iroh/src/net_report/probes.rs
@@ -0,0 +1,269 @@
+//! The relay probes.
+//!
+//! All the probes try and establish the latency to the relay servers. Preferably the QAD
+//! probes work and we also learn about our public IP addresses and ports. But fallback
+//! probes for HTTPS exist as well.
+
+use std::{collections::BTreeSet, fmt, sync::Arc};
+
+use iroh_relay::{RelayMap, RelayNode};
+use n0_future::time::Duration;
+use snafu::Snafu;
+
+use crate::net_report::Report;
+
+/// The retransmit interval used.
+const DEFAULT_INITIAL_RETRANSMIT: Duration = Duration::from_millis(100);
+
+/// The delay before starting HTTPS probes.
+const HTTPS_OFFSET: Duration = Duration::from_millis(200);
+
+/// The protocol used to time a node's latency.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, derive_more::Display)]
+#[repr(u8)]
+pub enum Probe {
+ /// HTTPS
+ Https,
+ /// QUIC Address Discovery Ipv4
+ #[cfg(not(wasm_browser))]
+ QadIpv4,
+ /// QUIC Address Discovery Ipv6
+ #[cfg(not(wasm_browser))]
+ QadIpv6,
+}
+
+/// A probe set is a sequence of similar [`Probe`]s with delays between them.
+///
+/// The probes are to the same Relayer and of the same [`Probe`] but will have different
+/// delays. The delays are effectively retries, though they do not wait for the previous
+/// probe to be finished. The first successful probe will cancel all other probes in the
+/// set.
+///
+/// This is a lot of type-safety by convention. It would be so much nicer to have this
+/// compile-time checked but that introduces a giant mess of generics and traits and
+/// associated exploding types.
+///
+/// A [`ProbeSet`] implements [`IntoIterator`] similar to how [`Vec`] does.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(super) struct ProbeSet {
+ /// The [`Probe`] all the probes in this set have.
+ proto: Probe,
+ /// The data in the set.
+ probes: Vec<(Duration, Arc)>,
+}
+
+#[derive(Debug, Snafu)]
+#[snafu(display("Mismatching probe"))]
+struct PushError;
+
+impl ProbeSet {
+ fn new(proto: Probe) -> Self {
+ Self {
+ probes: Vec::new(),
+ proto,
+ }
+ }
+
+ pub(super) fn proto(&self) -> Probe {
+ self.proto
+ }
+
+ fn push(&mut self, delay: Duration, node: Arc) {
+ self.probes.push((delay, node));
+ }
+
+ fn is_empty(&self) -> bool {
+ self.probes.is_empty()
+ }
+
+ pub(super) fn params(&self) -> impl Iterator- )> {
+ self.probes.iter()
+ }
+}
+
+/// A probe plan.
+///
+/// A probe plan contains a number of [`ProbeSet`]s containing probes to be executed.
+/// Generally the first probe of of a set which completes aborts the remaining probes of a
+/// set. Sometimes a failing probe can also abort the remaining probes of a set.
+///
+/// The [`reportgen`] actor will also abort all the remaining [`ProbeSet`]s once it has
+/// sufficient information for a report.
+///
+/// [`reportgen`]: crate::net_report::reportgen
+#[derive(Debug, Default, PartialEq, Eq)]
+pub(super) struct ProbePlan {
+ set: BTreeSet
,
+}
+
+impl ProbePlan {
+ /// Creates an initial probe plan
+ pub(super) fn initial(relay_map: &RelayMap, protocols: &BTreeSet) -> Self {
+ let mut plan = Self::default();
+
+ for relay_node in relay_map.nodes() {
+ let mut https_probes = ProbeSet::new(Probe::Https);
+
+ for attempt in 0u32..3 {
+ let delay = HTTPS_OFFSET + DEFAULT_INITIAL_RETRANSMIT * attempt;
+ https_probes.push(delay, relay_node.clone());
+ }
+
+ plan.add_if_enabled(protocols, https_probes);
+ }
+ plan
+ }
+
+ /// Creates a follow up probe plan using a previous net_report report in browsers.
+ ///
+ /// This will only schedule HTTPS probes.
+ pub(super) fn with_last_report(
+ relay_map: &RelayMap,
+ last_report: &Report,
+ protocols: &BTreeSet,
+ ) -> Self {
+ if last_report.relay_latency.is_empty() {
+ return Self::initial(relay_map, protocols);
+ }
+
+ // TODO: is this good?
+ Self::default()
+ }
+
+ /// Returns an iterator over the [`ProbeSet`]s in this plan.
+ pub(super) fn iter(&self) -> impl Iterator- {
+ self.set.iter()
+ }
+
+ /// Adds a [`ProbeSet`] if it contains probes and the protocol indicated in
+ /// the [`ProbeSet] matches a protocol in our set of [`Probe`]s.
+ fn add_if_enabled(&mut self, protocols: &BTreeSet
, set: ProbeSet) {
+ if !set.is_empty() && protocols.contains(&set.proto) {
+ self.set.insert(set);
+ }
+ }
+}
+
+impl fmt::Display for ProbePlan {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ writeln!(f, "ProbePlan {{")?;
+ for probe_set in self.set.iter() {
+ writeln!(f, r#" ProbeSet("{}") {{"#, probe_set.proto)?;
+ for (delay, node) in probe_set.probes.iter() {
+ writeln!(f, " {delay:?} to {node},")?;
+ }
+ writeln!(f, " }}")?;
+ }
+ writeln!(f, "}}")
+ }
+}
+
+impl FromIterator for ProbePlan {
+ fn from_iter>(iter: T) -> Self {
+ Self {
+ set: iter.into_iter().collect(),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use pretty_assertions::assert_eq;
+
+ use super::*;
+ use crate::net_report::test_utils;
+
+ /// Shorthand which declares a new ProbeSet.
+ ///
+ /// `$kind`: The `Probe`.
+ /// `$node`: Expression which will be an `Arc`.
+ /// `$delays`: A `Vec` of the delays for this probe.
+ macro_rules! probeset {
+ (proto: Probe::$kind:ident, relay: $node:expr, delays: $delays:expr,) => {
+ ProbeSet {
+ proto: Probe::$kind,
+ probes: $delays.iter().map(|delay| (*delay, $node)).collect(),
+ }
+ };
+ }
+
+ fn default_protocols() -> BTreeSet {
+ BTreeSet::from([Probe::QadIpv4, Probe::QadIpv6, Probe::Https])
+ }
+
+ #[tokio::test]
+ async fn test_initial_probeplan() {
+ let (_servers, relay_map) = test_utils::relay_map(2).await;
+ let relay_node_1 = relay_map.nodes().next().unwrap();
+ let relay_node_2 = relay_map.nodes().nth(1).unwrap();
+ let plan = ProbePlan::initial(&relay_map, &default_protocols());
+
+ let expected_plan: ProbePlan = [
+ probeset! {
+ proto: Probe::Https,
+ relay: relay_node_1.clone(),
+ delays: [
+ Duration::from_millis(200),
+ Duration::from_millis(300),
+ Duration::from_millis(400)
+ ],
+ },
+ probeset! {
+ proto: Probe::Https,
+ relay: relay_node_2.clone(),
+ delays: [
+ Duration::from_millis(200),
+ Duration::from_millis(300),
+ Duration::from_millis(400)
+ ],
+ },
+ ]
+ .into_iter()
+ .collect();
+
+ println!("expected:");
+ println!("{expected_plan}");
+ println!("actual:");
+ println!("{plan}");
+ // The readable error:
+ assert_eq!(plan.to_string(), expected_plan.to_string());
+ // Just in case there's a bug in the Display impl:
+ assert_eq!(plan, expected_plan);
+ }
+
+ #[tokio::test]
+ async fn test_initial_probeplan_some_protocols() {
+ let (_servers, relay_map) = test_utils::relay_map(2).await;
+ let relay_node_1 = relay_map.nodes().next().unwrap();
+ let relay_node_2 = relay_map.nodes().nth(1).unwrap();
+ let plan = ProbePlan::initial(&relay_map, &BTreeSet::from([Probe::Https]));
+
+ let expected_plan: ProbePlan = [
+ probeset! {
+ proto: Probe::Https,
+ relay: relay_node_1.clone(),
+ delays: [Duration::from_millis(200),
+ Duration::from_millis(300),
+ Duration::from_millis(400)],
+ },
+ probeset! {
+ proto: Probe::Https,
+ relay: relay_node_2.clone(),
+ delays: [Duration::from_millis(200),
+ Duration::from_millis(300),
+ Duration::from_millis(400)],
+ },
+ ]
+ .into_iter()
+ .collect();
+
+ println!("expected:");
+ println!("{expected_plan}");
+ println!("actual:");
+ println!("{plan}");
+ // The readable error:
+ assert_eq!(plan.to_string(), expected_plan.to_string());
+ // Just in case there's a bug in the Display impl:
+ assert_eq!(plan, expected_plan);
+ }
+}
diff --git a/iroh/src/net_report/report.rs b/iroh/src/net_report/report.rs
new file mode 100644
index 00000000000..af6a957359e
--- /dev/null
+++ b/iroh/src/net_report/report.rs
@@ -0,0 +1,214 @@
+use std::{
+ collections::BTreeMap,
+ fmt,
+ net::{SocketAddr, SocketAddrV4, SocketAddrV6},
+ time::Duration,
+};
+
+use iroh_base::RelayUrl;
+use tracing::warn;
+
+use super::{probes::Probe, ProbeReport};
+
+/// A net_report report.
+#[derive(Default, Debug, PartialEq, Eq, Clone)]
+pub struct Report {
+ /// A QAD IPv4 round trip completed.
+ pub udp_v4: bool,
+ /// A QAD IPv6 round trip completed.
+ pub udp_v6: bool,
+ /// Whether the reported public address differs when probing different servers (on IPv4).
+ pub mapping_varies_by_dest_ipv4: Option,
+ /// Whether the reported public address differs when probing different servers (on IPv6).
+ pub mapping_varies_by_dest_ipv6: Option,
+ /// Probe indicating the presence of port mapping protocols on the LAN.
+ /// `None` for unknown
+ pub preferred_relay: Option,
+ /// keyed by relay Url
+ pub relay_latency: RelayLatencies,
+ /// ip:port of global IPv4
+ pub global_v4: Option,
+ /// `[ip]:port` of global IPv6
+ pub global_v6: Option,
+ /// CaptivePortal is set when we think there's a captive portal that is
+ /// intercepting HTTP traffic.
+ pub captive_portal: Option,
+}
+
+impl fmt::Display for Report {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&self, f)
+ }
+}
+
+impl Report {
+ /// Do we have any indication that UDP is working?
+ pub fn has_udp(&self) -> bool {
+ self.udp_v4 || self.udp_v6
+ }
+
+ /// Whether the reported public address differs when probing different servers.
+ pub fn mapping_varies_by_dest(&self) -> Option {
+ match (
+ self.mapping_varies_by_dest_ipv4,
+ self.mapping_varies_by_dest_ipv6,
+ ) {
+ (Some(v4), Some(v6)) => Some(v4 || v6),
+ (None, Some(v6)) => Some(v6),
+ (Some(v4), None) => Some(v4),
+ (None, None) => None,
+ }
+ }
+
+ /// Updates a net_report [`Report`] with a new [`ProbeReport`].
+ pub(super) fn update(&mut self, report: &ProbeReport) {
+ match report {
+ ProbeReport::Https(report) => {
+ self.relay_latency
+ .update_relay(report.node.clone(), report.latency, Probe::Https);
+ }
+ #[cfg(not(wasm_browser))]
+ ProbeReport::QadIpv4(report) => {
+ self.relay_latency.update_relay(
+ report.node.clone(),
+ report.latency,
+ Probe::QadIpv4,
+ );
+ let SocketAddr::V4(ipp) = report.addr else {
+ warn!("received IPv6 address from IPv4 QAD: {}", report.addr);
+ return;
+ };
+
+ self.udp_v4 = true;
+
+ tracing::debug!(?self.global_v4, ?self.mapping_varies_by_dest_ipv4, %ipp,"got");
+ if let Some(global) = self.global_v4 {
+ if global == ipp {
+ if self.mapping_varies_by_dest_ipv4.is_none() {
+ self.mapping_varies_by_dest_ipv4 = Some(false);
+ }
+ } else {
+ self.mapping_varies_by_dest_ipv4 = Some(true);
+ warn!("IPv4 address detected by QAD varies by destination");
+ }
+ } else {
+ self.global_v4 = Some(ipp);
+ }
+ }
+ #[cfg(not(wasm_browser))]
+ ProbeReport::QadIpv6(report) => {
+ self.relay_latency.update_relay(
+ report.node.clone(),
+ report.latency,
+ Probe::QadIpv6,
+ );
+ let SocketAddr::V6(ipp) = report.addr else {
+ warn!("received IPv4 address from IPv6 QAD: {}", report.addr);
+ return;
+ };
+
+ self.udp_v6 = true;
+ tracing::debug!(?self.global_v6, ?self.mapping_varies_by_dest_ipv6, %ipp,"got");
+ if let Some(global) = self.global_v6 {
+ if global == ipp {
+ if self.mapping_varies_by_dest_ipv6.is_none() {
+ self.mapping_varies_by_dest_ipv6 = Some(false);
+ }
+ } else {
+ self.mapping_varies_by_dest_ipv6 = Some(true);
+ warn!("IPv6 address detected by QAD varies by destination");
+ }
+ } else {
+ self.global_v6 = Some(ipp);
+ }
+ }
+ }
+ }
+}
+
+/// Latencies per relay node.
+#[derive(Debug, Default, PartialEq, Eq, Clone)]
+pub struct RelayLatencies {
+ #[cfg(not(wasm_browser))]
+ ipv4: BTreeMap,
+ #[cfg(not(wasm_browser))]
+ ipv6: BTreeMap,
+ https: BTreeMap,
+}
+
+impl RelayLatencies {
+ /// Updates a relay's latency, if it is faster than before.
+ pub(super) fn update_relay(&mut self, url: RelayUrl, latency: Duration, probe: Probe) {
+ let list = match probe {
+ Probe::Https => &mut self.https,
+ #[cfg(not(wasm_browser))]
+ Probe::QadIpv4 => &mut self.ipv4,
+ #[cfg(not(wasm_browser))]
+ Probe::QadIpv6 => &mut self.ipv6,
+ };
+ let old_latency = list.entry(url).or_insert(latency);
+ if latency < *old_latency {
+ *old_latency = latency;
+ }
+ }
+
+ /// Merges another [`RelayLatencies`] into this one.
+ ///
+ /// For each relay the latency is updated using [`RelayLatencies::update_relay`].
+ pub(super) fn merge(&mut self, other: &RelayLatencies) {
+ for (url, latency) in other.https.iter() {
+ self.update_relay(url.clone(), *latency, Probe::Https);
+ }
+ #[cfg(not(wasm_browser))]
+ for (url, latency) in other.ipv4.iter() {
+ self.update_relay(url.clone(), *latency, Probe::QadIpv4);
+ }
+ #[cfg(not(wasm_browser))]
+ for (url, latency) in other.ipv6.iter() {
+ self.update_relay(url.clone(), *latency, Probe::QadIpv6);
+ }
+ }
+
+ /// Returns an iterator over all the relays and their latencies.
+ #[cfg(not(wasm_browser))]
+ pub fn iter(&self) -> impl Iterator- + '_ {
+ self.https
+ .iter()
+ .chain(self.ipv4.iter())
+ .chain(self.ipv6.iter())
+ .map(|(k, v)| (k, *v))
+ }
+
+ /// Returns an iterator over all the relays and their latencies.
+ #[cfg(wasm_browser)]
+ pub fn iter(&self) -> impl Iterator
- + '_ {
+ self.https.iter().map(|(k, v)| (k, *v))
+ }
+
+ #[cfg(not(wasm_browser))]
+ pub(super) fn is_empty(&self) -> bool {
+ self.https.is_empty() && self.ipv4.is_empty() && self.ipv6.is_empty()
+ }
+
+ #[cfg(wasm_browser)]
+ pub(super) fn is_empty(&self) -> bool {
+ self.https.is_empty()
+ }
+
+ /// Returns the lowest latency across records.
+ pub(super) fn get(&self, url: &RelayUrl) -> Option
{
+ let mut list = Vec::with_capacity(3);
+ if let Some(val) = self.https.get(url) {
+ list.push(*val);
+ }
+ #[cfg(not(wasm_browser))]
+ if let Some(val) = self.ipv4.get(url) {
+ list.push(*val);
+ }
+ #[cfg(not(wasm_browser))]
+ if let Some(val) = self.ipv6.get(url) {
+ list.push(*val);
+ }
+ list.into_iter().min()
+ }
+}
diff --git a/iroh/src/net_report/reportgen.rs b/iroh/src/net_report/reportgen.rs
index 4484f20047d..aa6d95094f5 100644
--- a/iroh/src/net_report/reportgen.rs
+++ b/iroh/src/net_report/reportgen.rs
@@ -6,7 +6,6 @@
//! messages from the client. It follows roughly these steps:
//!
//! - Determines host IPv6 support.
-//! - Creates hairpin actor.
//! - Creates portmapper future.
//! - Creates captive portal detection future.
//! - Creates Probe Set futures.
@@ -16,24 +15,21 @@
//! - Stop if there are no outstanding tasks/futures, or on timeout.
//! - Sends the completed report to the net_report actor.
+#[cfg(not(wasm_browser))]
+use std::net::{SocketAddrV4, SocketAddrV6};
use std::{
collections::BTreeSet,
- future::Future,
net::{IpAddr, SocketAddr},
- pin::Pin,
sync::Arc,
- task::{Context, Poll},
};
use http::StatusCode;
use iroh_base::RelayUrl;
+use iroh_relay::{defaults::DEFAULT_RELAY_QUIC_PORT, http::RELAY_PROBE_PATH, RelayMap, RelayNode};
#[cfg(not(wasm_browser))]
-use iroh_relay::dns::{DnsError, DnsResolver, StaggeredError};
use iroh_relay::{
- defaults::{DEFAULT_RELAY_QUIC_PORT, DEFAULT_STUN_PORT},
- http::RELAY_PROBE_PATH,
- protos::stun,
- RelayMap, RelayNode,
+ dns::{DnsError, DnsResolver, StaggeredError},
+ quic::QuicClient,
};
#[cfg(wasm_browser)]
use n0_future::future::Pending;
@@ -42,61 +38,71 @@ use n0_future::{
time::{self, Duration, Instant},
StreamExt as _,
};
-#[cfg(not(wasm_browser))]
-use netwatch::{interfaces, UdpSocket};
use rand::seq::IteratorRandom;
-use snafu::{IntoError, ResultExt, Snafu};
-use tokio::sync::{mpsc, oneshot};
-use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span};
+use snafu::{IntoError, OptionExt, ResultExt, Snafu};
+use tokio::sync::mpsc;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument};
use url::Host;
#[cfg(wasm_browser)]
-use crate::net_report::portmapper; // We stub the library
-use crate::net_report::{self, Metrics, Report};
+use super::portmapper; // We stub the library
#[cfg(not(wasm_browser))]
-use crate::net_report::{
- defaults::timeouts::DNS_TIMEOUT,
- dns::DNS_STAGGERING_MS,
- ip_mapped_addrs::IpMappedAddresses,
- ping::{PingError, Pinger},
+use super::{defaults::timeouts::DNS_TIMEOUT, ip_mapped_addrs::IpMappedAddresses};
+use super::{
+ probes::{Probe, ProbePlan},
+ Report,
};
-
#[cfg(not(wasm_browser))]
-mod hairpin;
-mod probes;
-
-pub use probes::ProbeProto;
-use probes::{Probe, ProbePlan};
-
+use crate::discovery::dns::DNS_STAGGERING_MS;
use crate::net_report::defaults::timeouts::{
CAPTIVE_PORTAL_DELAY, CAPTIVE_PORTAL_TIMEOUT, OVERALL_REPORT_TIMEOUT, PROBES_TIMEOUT,
};
-const ENOUGH_NODES: usize = 3;
-
-/// Holds the state for a single invocation of [`net_report::Client::get_report`].
+/// Holds the state for a single report generation.
///
/// Dropping this will cancel the actor and stop the report generation.
#[derive(Debug)]
pub(super) struct Client {
- // Addr is currently only used by child actors, so not yet exposed here.
_drop_guard: AbortOnDropHandle<()>,
}
+/// Some details required from the interface state of the device.
+#[derive(Debug, Clone, Default)]
+pub(crate) struct IfStateDetails {
+ /// Do we have IPv4 capbilities
+ pub have_v4: bool,
+ /// Do we have IPv6 capbilities
+ pub have_v6: bool,
+}
+
+impl IfStateDetails {
+ #[cfg(test)]
+ pub(super) fn fake() -> Self {
+ IfStateDetails {
+ have_v4: true,
+ have_v6: true,
+ }
+ }
+}
+
+impl From for IfStateDetails {
+ fn from(value: netwatch::netmon::State) -> Self {
+ IfStateDetails {
+ have_v4: value.have_v4,
+ have_v6: value.have_v6,
+ }
+ }
+}
+
/// Any state that depends on sockets being available in the current environment.
///
/// Factored out so it can be disabled easily in browsers.
#[cfg(not(wasm_browser))]
#[derive(Debug, Clone)]
pub(crate) struct SocketState {
- /// The portmapper client, if there is one.
- pub(crate) port_mapper: Option,
- /// Socket to send IPv4 STUN requests from.
- pub(crate) stun_sock4: Option>,
- /// Socket so send IPv6 STUN requests from.
- pub(crate) stun_sock6: Option>,
- /// QUIC configuration to do QUIC address Discovery
- pub(crate) quic_config: Option,
+ /// QUIC client to do QUIC address Discovery
+ pub(crate) quic_client: Option,
/// The DNS resolver to use for probes that need to resolve DNS records.
pub(crate) dns_resolver: DnsResolver,
/// Optional [`IpMappedAddresses`] used to enable QAD in iroh
@@ -109,158 +115,89 @@ impl Client {
/// The actor starts running immediately and only generates a single report, after which
/// it shuts down. Dropping this handle will abort the actor.
pub(super) fn new(
- net_report: net_report::Addr,
- last_report: Option>,
+ last_report: Option,
relay_map: RelayMap,
- protocols: BTreeSet,
- metrics: Arc,
+ protocols: BTreeSet,
+ if_state: IfStateDetails,
#[cfg(not(wasm_browser))] socket_state: SocketState,
#[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool,
- ) -> Self {
+ ) -> (Self, mpsc::Receiver) {
let (msg_tx, msg_rx) = mpsc::channel(32);
- let addr = Addr {
- sender: msg_tx.clone(),
- };
- let mut actor = Actor {
+ let actor = Actor {
msg_tx,
- msg_rx,
- net_report: net_report.clone(),
last_report,
relay_map,
- report: Report::default(),
- outstanding_tasks: OutstandingTasks::default(),
protocols,
#[cfg(not(wasm_browser))]
socket_state,
- #[cfg(not(wasm_browser))]
- hairpin_actor: hairpin::Client::new(net_report, addr),
- metrics,
#[cfg(any(test, feature = "test-utils"))]
insecure_skip_relay_cert_verify,
+ if_state,
};
- let task =
- task::spawn(async move { actor.run().await }.instrument(info_span!("reportgen.actor")));
- Self {
- _drop_guard: AbortOnDropHandle::new(task),
- }
- }
-}
-
-/// The address of the reportstate [`Actor`].
-///
-/// Unlike the [`Client`] struct itself this is the raw channel to send message over.
-/// Keeping this alive will not keep the actor alive, which makes this handy to pass to
-/// internal tasks.
-#[derive(Debug, Clone)]
-pub(super) struct Addr {
- sender: mpsc::Sender,
-}
-
-impl Addr {
- /// Blocking send to the actor, to be used from a non-actor future.
- async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> {
- trace!(
- "sending {:?} to channel with cap {}",
- msg,
- self.sender.capacity()
- );
- self.sender.send(msg).await
+ let task = task::spawn(actor.run().instrument(info_span!("reportgen.actor")));
+ (
+ Self {
+ _drop_guard: AbortOnDropHandle::new(task),
+ },
+ msg_rx,
+ )
}
}
-/// Messages to send to the reportstate [`Actor`].
-#[derive(Debug)]
-enum Message {
- /// Set the hairpinning availability in the report.
- HairpinResult(bool),
- /// Check whether executing a probe would still help.
- // TODO: Ideally we remove the need for this message and the logic is inverted: once we
- // get a probe result we cancel all probes that are no longer needed. But for now it's
- // this way around to ease conversion.
- ProbeWouldHelp(Probe, Arc, oneshot::Sender),
- /// Abort all remaining probes.
- AbortProbes,
-}
-
/// The reportstate actor.
///
/// This actor starts, generates a single report and exits.
#[derive(Debug)]
struct Actor {
- /// The sender of the message channel, so we can give out [`Addr`].
- msg_tx: mpsc::Sender,
- /// The receiver of the message channel.
- msg_rx: mpsc::Receiver,
- /// The address of the net_report actor.
- net_report: super::Addr,
+ msg_tx: mpsc::Sender,
// Provided state
/// The previous report, if it exists.
- last_report: Option>,
+ last_report: Option,
/// The relay configuration.
relay_map: RelayMap,
// Internal state.
- /// The report being built.
- report: Report,
- /// Which tasks the [`Actor`] is still waiting on.
- ///
- /// This is essentially the summary of all the work the [`Actor`] is doing.
- outstanding_tasks: OutstandingTasks,
/// Protocols we should attempt to create probes for, if we have the correct
/// configuration for that protocol.
- protocols: BTreeSet,
+ protocols: BTreeSet,
/// Any socket-related state that doesn't exist/work in browsers
#[cfg(not(wasm_browser))]
socket_state: SocketState,
- /// The hairpin actor.
- #[cfg(not(wasm_browser))]
- hairpin_actor: hairpin::Client,
- metrics: Arc,
#[cfg(any(test, feature = "test-utils"))]
insecure_skip_relay_cert_verify: bool,
+ if_state: IfStateDetails,
}
#[allow(missing_docs)]
#[derive(Debug, Snafu)]
#[non_exhaustive]
-pub enum ActorRunError {
- #[snafu(display("Report generation timed out"))]
- Timeout,
- #[snafu(display("Client that requested the report is gone"))]
- ClientGone,
- #[snafu(display("Internal NetReport actor is gone"))]
- ActorGone,
- #[snafu(transparent)]
- Probes { source: ProbesError },
-}
-
-#[allow(missing_docs)]
-#[derive(Debug, Snafu)]
-#[non_exhaustive]
-pub enum ProbesError {
+#[snafu(module)]
+pub(super) enum ProbesError {
#[snafu(display("Probe failed"))]
ProbeFailure { source: ProbeError },
#[snafu(display("All probes failed"))]
AllProbesFailed,
+ #[snafu(display("Probe cancelled"))]
+ Cancelled,
+ #[snafu(display("Probe timed out"))]
+ Timeout,
}
-impl Actor {
- fn addr(&self) -> Addr {
- Addr {
- sender: self.msg_tx.clone(),
- }
- }
+#[derive(Debug)]
+pub(super) enum ProbeFinished {
+ Regular(Result),
+ #[cfg(not(wasm_browser))]
+ CaptivePortal(Option),
+}
- async fn run(&mut self) {
- match self.run_inner().await {
- Ok(_) => debug!("reportgen actor finished"),
- Err(err) => {
- self.net_report
- .send(net_report::Message::ReportAborted { reason: err })
- .await
- .ok();
+impl Actor {
+ async fn run(self) {
+ match time::timeout(OVERALL_REPORT_TIMEOUT, self.run_inner()).await {
+ Ok(()) => debug!("reportgen actor finished"),
+ Err(time::Elapsed { .. }) => {
+ warn!("reportgen timed out");
}
}
}
@@ -269,7 +206,6 @@ impl Actor {
///
/// This actor runs by:
///
- /// - Creates a hairpin actor.
/// - Creates a captive portal future.
/// - Creates ProbeSet futures in a group of futures.
/// - Runs a main loop:
@@ -277,269 +213,58 @@ impl Actor {
/// - Receives actor messages (sent by those futures).
/// - Updates the report, cancels unneeded futures.
/// - Sends the report to the net_report actor.
- async fn run_inner(&mut self) -> Result<(), ActorRunError> {
- #[cfg(not(wasm_browser))]
- let port_mapper = self.socket_state.port_mapper.is_some();
- #[cfg(wasm_browser)]
- let port_mapper = false;
- debug!(%port_mapper, "reportstate actor starting");
-
- self.report.os_has_ipv6 = super::os_has_ipv6();
-
- let mut port_mapping = self.prepare_portmapper_task();
- let mut captive_task = self.prepare_captive_portal_task();
- let mut probes = self.spawn_probes_task().await;
-
- let total_timer = time::sleep(OVERALL_REPORT_TIMEOUT);
- tokio::pin!(total_timer);
- let probe_timer = time::sleep(PROBES_TIMEOUT);
- tokio::pin!(probe_timer);
-
- loop {
- trace!(awaiting = ?self.outstanding_tasks, "tick; awaiting tasks");
- if self.outstanding_tasks.all_done() {
- debug!("all tasks done");
- break;
- }
- tokio::select! {
- biased;
- _ = &mut total_timer => {
- trace!("tick: total_timer expired");
- return Err(TimeoutSnafu.build());
- }
+ async fn run_inner(self) {
+ debug!("reportstate actor starting");
- _ = &mut probe_timer => {
- warn!("tick: probes timed out");
- // Set new timeout to not go into this branch multiple times. We need
- // the abort to finish all probes normally. PROBES_TIMEOUT is
- // sufficiently far in the future.
- probe_timer.as_mut().reset(Instant::now() + PROBES_TIMEOUT);
- probes.abort_all();
- self.handle_abort_probes();
- }
+ let mut probes = JoinSet::default();
- // Drive the portmapper.
- pm = &mut port_mapping, if self.outstanding_tasks.port_mapper => {
- debug!(report=?pm, "tick: portmapper probe report");
- self.report.portmap_probe = pm;
- port_mapping.inner = None;
- self.outstanding_tasks.port_mapper = false;
- }
+ let _probes_token = self.spawn_probes_task(self.if_state.clone(), &mut probes);
+ let mut num_probes = probes.len();
- // Check for probes finishing.
- set_result = probes.join_next(), if self.outstanding_tasks.probes => {
- trace!("tick: probes done: {:?}", set_result);
- match set_result {
- Some(Ok(Ok(report))) => self.handle_probe_report(report),
- Some(Ok(Err(_))) => (),
- Some(Err(e)) => {
- warn!("probes task error: {:?}", e);
- }
- None => {
- self.handle_abort_probes();
- }
- }
- trace!("tick: probes handled");
- }
+ let captive_token = self.prepare_captive_portal_task(&mut probes);
- // Drive the captive task.
- found = &mut captive_task, if self.outstanding_tasks.captive_task => {
- trace!("tick: captive portal task done");
- self.report.captive_portal = found;
- captive_task.inner = None;
- self.outstanding_tasks.captive_task = false;
- }
+ // any reports of working UDP/QUIC?
+ let mut have_udp = false;
- // Handle actor messages.
- msg = self.msg_rx.recv() => {
- trace!("tick: msg recv: {:?}", msg);
- match msg {
- Some(msg) => self.handle_message(msg),
- None => {
- return Err(ClientGoneSnafu.build());
+ // Check for probes finishing.
+ while let Some(probe_result) = probes.join_next().await {
+ trace!(?probe_result, num_probes, "processing finished probe");
+ match probe_result {
+ Ok(report) => {
+ #[cfg_attr(wasm_browser, allow(irrefutable_let_patterns))]
+ if let ProbeFinished::Regular(report) = &report {
+ have_udp |= report.as_ref().map(|r| r.is_udp()).unwrap_or_default();
+ num_probes -= 1;
+
+ // If all probes are done & we have_udp cancel captive
+ if num_probes == 0 {
+ debug!("all regular probes done");
+ debug_assert!(probes.len() <= 1, "{} probes", probes.len());
+
+ if have_udp {
+ captive_token.cancel();
+ }
}
}
+ self.msg_tx.send(report).await.ok();
}
- }
- }
-
- if !probes.is_empty() {
- debug!(
- "aborting {} probe sets, already have enough reports",
- probes.len()
- );
- drop(probes);
- }
-
- debug!("Sending report to net_report actor");
- self.net_report
- .send(net_report::Message::ReportReady {
- report: Box::new(self.report.clone()),
- })
- .await
- .map_err(|_| ActorGoneSnafu.build())?;
-
- Ok(())
- }
-
- /// Handles an actor message.
- ///
- /// Returns `true` if all the probes need to be aborted.
- fn handle_message(&mut self, msg: Message) {
- trace!(?msg, "handling message");
- match msg {
- Message::HairpinResult(works) => {
- self.report.hair_pinning = Some(works);
- self.outstanding_tasks.hairpin = false;
- }
- Message::ProbeWouldHelp(probe, relay_node, response_tx) => {
- let res = self.probe_would_help(probe, relay_node);
- if response_tx.send(res).is_err() {
- debug!("probe dropped before ProbeWouldHelp response sent");
- }
- }
- Message::AbortProbes => {
- self.handle_abort_probes();
- }
- }
- }
-
- fn handle_probe_report(&mut self, probe_report: ProbeReport) {
- debug!(?probe_report, "finished probe");
- update_report(&mut self.report, probe_report);
-
- // When we discover the first IPv4 address we want to start the hairpin actor.
- #[cfg(not(wasm_browser))]
- if let Some(ref addr) = self.report.global_v4 {
- if !self.hairpin_actor.has_started() {
- self.hairpin_actor.start_check(*addr);
- self.outstanding_tasks.hairpin = true;
- }
- }
-
- // Once we've heard from enough relay servers (3), start a timer to give up on the other
- // probes. The timer's duration is a function of whether this is our initial full
- // probe or an incremental one. For incremental ones, wait for the duration of the
- // slowest relay. For initial ones, double that.
- let enough_relays = std::cmp::min(self.relay_map.len(), ENOUGH_NODES);
- if self.report.relay_latency.len() == enough_relays {
- let timeout = self.report.relay_latency.max_latency();
- let timeout = match self.last_report.is_some() {
- true => timeout,
- false => timeout * 2,
- };
- let reportcheck = self.addr();
- debug!(
- reports=self.report.relay_latency.len(),
- delay=?timeout,
- "Have enough probe reports, aborting further probes soon",
- );
- task::spawn(
- async move {
- time::sleep(timeout).await;
- // Because we do this after a timeout it is entirely normal that the
- // actor is no longer there by the time we send this message.
- reportcheck
- .send(Message::AbortProbes)
- .await
- .map_err(|err| trace!("Failed to abort all probes: {err:#}"))
- .ok();
- }
- .instrument(Span::current()),
- );
- }
- }
-
- /// Whether running this probe would still improve our report.
- fn probe_would_help(&mut self, probe: Probe, relay_node: Arc) -> bool {
- // If the probe is for a relay we don't yet know about, that would help.
- if self.report.relay_latency.get(&relay_node.url).is_none() {
- return true;
- }
-
- // If the probe is for IPv6 and we don't yet have an IPv6 report, that would help.
- #[cfg(not(wasm_browser))]
- if probe.proto() == ProbeProto::StunIpv6 && self.report.relay_v6_latency.is_empty() {
- return true;
- }
-
- // For IPv4, we need at least two IPv4 results overall to
- // determine whether we're behind a NAT that shows us as
- // different source IPs and/or ports depending on who we're
- // talking to. If we don't yet have two results yet
- // (`mapping_varies_by_dest_ip` is blank), then another IPv4 probe
- // would be good.
- #[cfg(not(wasm_browser))]
- if probe.proto() == ProbeProto::StunIpv4 && self.report.mapping_varies_by_dest_ip.is_none()
- {
- return true;
- }
-
- // Otherwise not interesting.
- false
- }
-
- /// Stops further probes.
- ///
- /// This makes sure that no further probes are run and also cancels the captive portal
- /// and portmapper tasks if there were successful probes. Be sure to only handle this
- /// after all the required [`ProbeReport`]s have been processed.
- fn handle_abort_probes(&mut self) {
- trace!("handle abort probes");
- self.outstanding_tasks.probes = false;
- if self.report.udp {
- self.outstanding_tasks.port_mapper = false;
- self.outstanding_tasks.captive_task = false;
- }
- }
-
- /// Creates the future which will perform the portmapper task.
- ///
- /// The returned future will run the portmapper, if enabled, resolving to it's result.
- fn prepare_portmapper_task(
- &mut self,
- ) -> MaybeFuture>>>> {
- // In the browser, the compiler struggles to infer the type of future inside, because it's never set.
- #[cfg(wasm_browser)]
- let port_mapping: MaybeFuture>>>> =
- MaybeFuture::default();
-
- #[cfg(not(wasm_browser))]
- let mut port_mapping = MaybeFuture::default();
-
- #[cfg(not(wasm_browser))]
- if let Some(port_mapper) = self.socket_state.port_mapper.clone() {
- port_mapping.inner = Some(Box::pin(async move {
- match port_mapper.probe().await {
- Ok(Ok(res)) => Some(res),
- Ok(Err(err)) => {
- debug!("skipping port mapping: {err:?}");
- None
- }
- Err(recv_err) => {
- warn!("skipping port mapping: {recv_err:?}");
- None
+ Err(e) => {
+ if e.is_panic() {
+ error!("Task panicked {:?}", e);
+ break;
}
+ warn!("probes task join error: {:?}", e);
}
- }));
- self.outstanding_tasks.port_mapper = true;
+ }
}
- port_mapping
}
/// Creates the future which will perform the captive portal check.
- fn prepare_captive_portal_task(
- &mut self,
- ) -> MaybeFuture>>>> {
- // In the browser case the compiler cannot infer the type of the future, because it's never set:
- #[cfg(wasm_browser)]
- let captive_task: MaybeFuture>>>> = MaybeFuture::default();
-
- #[cfg(not(wasm_browser))]
- let mut captive_task = MaybeFuture::default();
+ fn prepare_captive_portal_task(&self, tasks: &mut JoinSet) -> CancellationToken {
+ let token = CancellationToken::new();
// If we're doing a full probe, also check for a captive portal. We
- // delay by a bit to wait for UDP STUN to finish, to avoid the probe if
+ // delay by a bit to wait for UDP QAD to finish, to avoid the probe if
// it's unnecessary.
#[cfg(not(wasm_browser))]
if self.last_report.is_none() {
@@ -552,39 +277,49 @@ impl Actor {
let dns_resolver = self.socket_state.dns_resolver.clone();
let dm = self.relay_map.clone();
- self.outstanding_tasks.captive_task = true;
- captive_task.inner = Some(Box::pin(async move {
- time::sleep(CAPTIVE_PORTAL_DELAY).await;
- debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}");
- let captive_portal_check = time::timeout(
- CAPTIVE_PORTAL_TIMEOUT,
- check_captive_portal(&dns_resolver, &dm, preferred_relay)
- .instrument(debug_span!("captive-portal")),
- );
- match captive_portal_check.await {
- Ok(Ok(found)) => Some(found),
- Ok(Err(err)) => {
- match err {
- CaptivePortalError::CreateReqwestClient { ref source }
- | CaptivePortalError::HttpRequest { ref source } => {
- if source.is_connect() {
- debug!("check_captive_portal failed: {err:#}");
+ let token = token.clone();
+ tasks.spawn(
+ async move {
+ let res = token
+ .run_until_cancelled_owned(async move {
+ time::sleep(CAPTIVE_PORTAL_DELAY).await;
+ trace!("check started after {CAPTIVE_PORTAL_DELAY:?}");
+ time::timeout(
+ CAPTIVE_PORTAL_TIMEOUT,
+ check_captive_portal(&dns_resolver, &dm, preferred_relay),
+ )
+ .await
+ })
+ .await;
+ let res = match res {
+ Some(Ok(Ok(found))) => Some(found),
+ Some(Ok(Err(err))) => {
+ match err {
+ CaptivePortalError::CreateReqwestClient { source }
+ | CaptivePortalError::HttpRequest { source }
+ if source.is_connect() =>
+ {
+ debug!("check_captive_portal failed: {source:#}");
}
+ err => warn!("check_captive_portal error: {err:#}"),
}
- _ => warn!("check_captive_portal error: {err:#}"),
+ None
}
- None
- }
- Err(_) => {
- warn!("check_captive_portal timed out");
- None
- }
+ Some(Err(time::Elapsed { .. })) => {
+ warn!("probe timed out");
+ None
+ }
+ None => {
+ trace!("probe cancelled");
+ None
+ }
+ };
+ ProbeFinished::CaptivePortal(res)
}
- }));
+ .instrument(debug_span!("captive-portal")),
+ );
}
-
- self.outstanding_tasks.captive_task = false;
- captive_task
+ token
}
/// Prepares the future which will run all the probes as per generated ProbePlan.
@@ -605,220 +340,132 @@ impl Actor {
/// failure permanent. Probes in a probe set are essentially retries.
/// - Once there are [`ProbeReport`]s from enough nodes, all remaining probes are
/// aborted. That is, the main actor loop stops polling them.
- async fn spawn_probes_task(&mut self) -> JoinSet> {
- #[cfg(not(wasm_browser))]
- let if_state = interfaces::State::new().await;
- #[cfg(not(wasm_browser))]
- debug!(%if_state, "Local interfaces");
+ fn spawn_probes_task(
+ &self,
+ if_state: IfStateDetails,
+ probes: &mut JoinSet,
+ ) -> CancellationToken {
+ debug!(?if_state, "local interface details");
let plan = match self.last_report {
- Some(ref report) => ProbePlan::with_last_report(
- &self.relay_map,
- report,
- &self.protocols,
- #[cfg(not(wasm_browser))]
- &if_state,
- ),
- None => ProbePlan::initial(
- &self.relay_map,
- &self.protocols,
- #[cfg(not(wasm_browser))]
- &if_state,
- ),
+ Some(ref report) => {
+ ProbePlan::with_last_report(&self.relay_map, report, &self.protocols)
+ }
+ None => ProbePlan::initial(&self.relay_map, &self.protocols),
};
trace!(%plan, "probe plan");
- // The pinger is created here so that any sockets that might be bound for it are
- // shared between the probes that use it. It binds sockets lazily, so we can always
- // create it.
- #[cfg(not(wasm_browser))]
- let pinger = Pinger::new();
+ let token = CancellationToken::new();
- // A collection of futures running probe sets.
- let mut probes = JoinSet::default();
for probe_set in plan.iter() {
- let mut set = JoinSet::default();
- for probe in probe_set {
- let reportstate = self.addr();
- let relay_node = probe.node().clone();
- let probe = probe.clone();
- let net_report = self.net_report.clone();
-
- #[cfg(not(wasm_browser))]
- let pinger = pinger.clone();
- #[cfg(not(wasm_browser))]
- let socket_state = self.socket_state.clone();
-
- let metrics = self.metrics.clone();
- set.spawn(
- run_probe(
- reportstate,
- relay_node,
- probe.clone(),
- net_report,
- metrics,
- #[cfg(not(wasm_browser))]
- pinger,
+ let set_token = token.child_token();
+ let proto = probe_set.proto();
+ for (delay, relay_node) in probe_set.params() {
+ let probe_token = set_token.child_token();
+
+ let fut = probe_token.run_until_cancelled_owned(time::timeout(
+ PROBES_TIMEOUT,
+ proto.run(
+ *delay,
+ relay_node.clone(),
#[cfg(not(wasm_browser))]
- socket_state,
+ self.socket_state.clone(),
#[cfg(any(test, feature = "test-utils"))]
self.insecure_skip_relay_cert_verify,
- )
- .instrument(debug_span!("run_probe", %probe)),
- );
- }
-
- // Add the probe set to all futures of probe sets. Handle aborting a probe set
- // if needed, only normal errors means the set continues.
- probes.spawn(
- async move {
- // Hack because ProbeSet is not it's own type yet.
- let mut probe_proto = None;
- while let Some(res) = set.join_next().await {
- match res {
- Ok(Ok(report)) => return Ok(report),
- Ok(Err(ProbeErrorWithProbe::Error(err, probe))) => {
- probe_proto = Some(probe.proto());
- warn!(?probe, "probe failed: {:#}", err);
- continue;
- }
- Ok(Err(ProbeErrorWithProbe::AbortSet(err, probe))) => {
- debug!(?probe, "probe set aborted: {:#}", err);
- set.abort_all();
- return Err(ProbeFailureSnafu.into_error(err));
+ ),
+ ));
+ probes.spawn(
+ async move {
+ let res = fut.await;
+ let res = match res {
+ Some(Ok(Ok(report))) => Ok(report),
+ Some(Ok(Err(err))) => {
+ warn!("probe failed: {:#}", err);
+ Err(probes_error::ProbeFailureSnafu {}.into_error(err))
}
- Err(err) => {
- warn!("fatal probe set error, aborting: {:#}", err);
- continue;
+ Some(Err(time::Elapsed { .. })) => {
+ Err(probes_error::TimeoutSnafu.build())
}
- }
+ None => Err(probes_error::CancelledSnafu.build()),
+ };
+ ProbeFinished::Regular(res)
}
- warn!(?probe_proto, "no successful probes in ProbeSet");
- Err(AllProbesFailedSnafu.build())
- }
- .instrument(info_span!("probe")),
- );
+ .instrument(debug_span!(
+ "run-probe",
+ ?proto,
+ ?delay,
+ ?relay_node
+ )),
+ );
+ }
}
- self.outstanding_tasks.probes = true;
- probes
+ token
}
}
-/// Tasks on which the reportgen [`Actor`] is still waiting.
-///
-/// There is no particular progression, e.g. hairpin starts `false`, moves to `true` when a
-/// check is started and then becomes `false` again once it is finished.
-#[derive(Debug, Default)]
-struct OutstandingTasks {
- probes: bool,
- port_mapper: bool,
- captive_task: bool,
- hairpin: bool,
+/// The result of running a probe.
+#[derive(Debug, Clone)]
+pub(super) enum ProbeReport {
+ #[cfg(not(wasm_browser))]
+ QadIpv4(QadProbeReport),
+ #[cfg(not(wasm_browser))]
+ QadIpv6(QadProbeReport),
+ Https(HttpsProbeReport),
}
-impl OutstandingTasks {
- fn all_done(&self) -> bool {
- !(self.probes || self.port_mapper || self.captive_task || self.hairpin)
+impl ProbeReport {
+ #[cfg(not(wasm_browser))]
+ pub(super) fn is_udp(&self) -> bool {
+ matches!(self, Self::QadIpv4(_) | Self::QadIpv6(_))
+ }
+
+ #[cfg(wasm_browser)]
+ pub(super) fn is_udp(&self) -> bool {
+ false
}
}
-/// The success result of [`run_probe`].
-#[derive(Debug, Clone)]
-struct ProbeReport {
- /// Whether we can send IPv4 UDP packets.
- ipv4_can_send: bool,
- /// Whether we can send IPv6 UDP packets.
- ipv6_can_send: bool,
- /// Whether we can send ICMPv4 packets, `None` if not checked.
- icmpv4: Option,
- /// Whether we can send ICMPv6 packets, `None` if not checked.
- icmpv6: Option,
+#[cfg(not(wasm_browser))]
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(super) struct QadProbeReport {
+ /// The relay node that was probed
+ pub(super) node: RelayUrl,
/// The latency to the relay node.
- latency: Option,
- /// The probe that generated this report.
- probe: Probe,
+ pub(super) latency: Duration,
/// The discovered public address.
- addr: Option,
+ pub(super) addr: SocketAddr,
}
-impl ProbeReport {
- fn new(probe: Probe) -> Self {
- ProbeReport {
- probe,
- ipv4_can_send: false,
- ipv6_can_send: false,
- icmpv4: None,
- icmpv6: None,
- latency: None,
- addr: None,
- }
- }
-}
-
-/// Errors for [`run_probe`].
-///
-/// The main purpose is to signal whether other probes in this probe set should still be
-/// run. Recall that a probe set is normally a set of identical probes with delays,
-/// effectively creating retries, and the first successful probe of a probe set will cancel
-/// the others in the set. So this allows an unsuccessful probe to cancel the remainder of
-/// the set or not.
-#[derive(Debug)]
-enum ProbeErrorWithProbe {
- /// Abort the current set.
- AbortSet(ProbeError, Probe),
- /// Continue the other probes in the set.
- Error(ProbeError, Probe),
+#[derive(Debug, Clone)]
+pub(super) struct HttpsProbeReport {
+ /// The relay node that was probed
+ pub(super) node: RelayUrl,
+ /// The latency to the relay node.
+ pub(super) latency: Duration,
}
#[allow(missing_docs)]
#[derive(Debug, Snafu)]
#[snafu(module)]
#[non_exhaustive]
-pub enum ProbeError {
+pub(super) enum ProbeError {
#[snafu(display("Client is gone"))]
ClientGone,
#[snafu(display("Probe is no longer useful"))]
NotUseful,
- #[cfg(not(wasm_browser))]
- #[snafu(display("Failed to retrieve the relay address"))]
- GetRelayAddr { source: GetRelayAddrError },
- #[snafu(display("Failed to run stun probe"))]
- Stun { source: StunError },
- #[snafu(display("Failed to run QUIC probe"))]
- Quic { source: QuicError },
- #[cfg(not(wasm_browser))]
- #[snafu(display("Failed to run ICMP probe"))]
- Icmp { source: PingError },
+ #[snafu(display("Failed to run HTTPS probe"))]
+ Https { source: MeasureHttpsLatencyError },
}
#[allow(missing_docs)]
#[derive(Debug, Snafu)]
#[snafu(module)]
#[non_exhaustive]
-pub enum StunError {
- #[snafu(display("No UDP socket available"))]
- NoSocket,
- #[snafu(display("Stun channel is gone"))]
- StunChannelGone,
- #[snafu(display("Failed to send full STUN request"))]
- SendFull,
- #[snafu(display("Failed to send STUN request"))]
- Send { source: std::io::Error },
-}
-
-#[allow(missing_docs)]
-#[derive(Debug, Snafu)]
-#[snafu(module)]
-#[non_exhaustive]
-pub enum QuicError {
+pub(super) enum QuicError {
#[snafu(display("No QUIC endpoint available"))]
NoEndpoint,
#[snafu(display("URL must have 'host' to use QUIC address discovery probes"))]
InvalidUrl,
- #[snafu(display("Failed to create QUIC endpoint"))]
- CreateClient { source: iroh_relay::quic::Error },
- #[snafu(display("Failed to get address and latency"))]
- GetAddr { source: iroh_relay::quic::Error },
}
/// Pieces needed to do QUIC address discovery.
@@ -835,304 +482,53 @@ pub struct QuicConfig {
pub ipv6: bool,
}
-/// Executes a particular [`Probe`], including using a delayed start if needed.
-///
-/// If *stun_sock4* and *stun_sock6* are `None` the STUN probes are disabled.
-#[allow(clippy::too_many_arguments)]
-async fn run_probe(
- reportstate: Addr,
- relay_node: Arc,
- probe: Probe,
- net_report: net_report::Addr,
- metrics: Arc,
- #[cfg(not(wasm_browser))] pinger: Pinger,
- #[cfg(not(wasm_browser))] socket_state: SocketState,
- #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool,
-) -> Result {
- if !probe.delay().is_zero() {
- trace!("delaying probe");
- time::sleep(probe.delay()).await;
- }
- debug!("starting probe");
-
- let (would_help_tx, would_help_rx) = oneshot::channel();
- if let Err(err) = reportstate
- .send(Message::ProbeWouldHelp(
- probe.clone(),
- relay_node.clone(),
- would_help_tx,
- ))
- .await
- {
- // this happens on shutdown or if the report is already finished
- debug!("Failed to check if probe would help: {err:#}");
- return Err(ProbeErrorWithProbe::AbortSet(
- probe_error::ClientGoneSnafu.build(),
- probe.clone(),
- ));
- }
-
- if !would_help_rx.await.map_err(|_| {
- ProbeErrorWithProbe::AbortSet(probe_error::ClientGoneSnafu.build(), probe.clone())
- })? {
- return Err(ProbeErrorWithProbe::AbortSet(
- probe_error::NotUsefulSnafu.build(),
- probe,
- ));
- }
-
- #[cfg(not(wasm_browser))]
- let relay_addr = get_relay_addr(&socket_state.dns_resolver, &relay_node, probe.proto())
- .await
- .map_err(|e| {
- ProbeErrorWithProbe::AbortSet(
- probe_error::GetRelayAddrSnafu.into_error(e),
- probe.clone(),
- )
- })?;
-
- let mut result = ProbeReport::new(probe.clone());
- match probe {
- #[cfg(not(wasm_browser))]
- Probe::StunIpv4 { .. } | Probe::StunIpv6 { .. } => {
- let maybe_sock = if matches!(probe, Probe::StunIpv4 { .. }) {
- socket_state.stun_sock4.as_ref()
- } else {
- socket_state.stun_sock6.as_ref()
- };
- match maybe_sock {
- Some(sock) => {
- result = run_stun_probe(sock, relay_addr, net_report, probe, &metrics).await?;
- }
- None => {
- return Err(ProbeErrorWithProbe::AbortSet(
- probe_error::StunSnafu.into_error(stun_error::NoSocketSnafu.build()),
- probe.clone(),
- ));
- }
- }
- }
- #[cfg(not(wasm_browser))]
- Probe::IcmpV4 { .. } | Probe::IcmpV6 { .. } => {
- result = run_icmp_probe(probe, relay_addr, pinger).await?
- }
- Probe::Https { ref node, .. } => {
- debug!("sending probe HTTPS");
- match measure_https_latency(
- #[cfg(not(wasm_browser))]
- &socket_state.dns_resolver,
- node,
- #[cfg(any(test, feature = "test-utils"))]
- insecure_skip_relay_cert_verify,
- )
- .await
- {
- Ok((latency, ip)) => {
- debug!(?latency, "latency");
- result.latency = Some(latency);
- // We set these IPv4 and IPv6 but they're not really used
- // and we don't necessarily set them both. If UDP is blocked
- // and both IPv4 and IPv6 are available over TCP, it's basically
- // random which fields end up getting set here.
- // Since they're not needed, that's fine for now.
- match ip {
- IpAddr::V4(_) => result.ipv4_can_send = true,
- IpAddr::V6(_) => result.ipv6_can_send = true,
- }
- }
- Err(err) => {
- warn!("https latency measurement failed: {:?}", err);
- }
- }
- }
-
- #[cfg(not(wasm_browser))]
- Probe::QuicIpv4 { ref node, .. } | Probe::QuicIpv6 { ref node, .. } => {
- debug!("sending QUIC address discovery probe");
- let url = node.url.clone();
- match socket_state.quic_config {
- Some(quic_config) => {
- result = run_quic_probe(
- quic_config,
- url,
- relay_addr,
- probe,
- socket_state.ip_mapped_addrs,
- )
- .await?;
- }
- None => {
- return Err(ProbeErrorWithProbe::AbortSet(
- probe_error::QuicSnafu.into_error(quic_error::NoEndpointSnafu.build()),
- probe.clone(),
- ));
- }
- }
- }
- }
-
- trace!("probe successful");
- Ok(result)
-}
-
-/// Run a STUN IPv4 or IPv6 probe.
-#[cfg(not(wasm_browser))]
-async fn run_stun_probe(
- sock: &Arc,
- relay_addr: SocketAddr,
- net_report: net_report::Addr,
- probe: Probe,
- metrics: &Metrics,
-) -> Result {
- match probe.proto() {
- ProbeProto::StunIpv4 => debug_assert!(relay_addr.is_ipv4()),
- ProbeProto::StunIpv6 => debug_assert!(relay_addr.is_ipv6()),
- _ => debug_assert!(false, "wrong probe"),
- }
- let txid = stun::TransactionId::default();
- let req = stun::request(txid);
-
- // Setup net_report to give us back the incoming STUN response.
- let (stun_tx, stun_rx) = oneshot::channel();
- let (inflight_ready_tx, inflight_ready_rx) = oneshot::channel();
- net_report
- .send(net_report::Message::InFlightStun(
- net_report::Inflight {
- txn: txid,
- start: Instant::now(),
- s: stun_tx,
- },
- inflight_ready_tx,
- ))
- .await
- .map_err(|_| {
- ProbeErrorWithProbe::Error(probe_error::ClientGoneSnafu.build(), probe.clone())
- })?;
- inflight_ready_rx.await.map_err(|_| {
- ProbeErrorWithProbe::Error(probe_error::ClientGoneSnafu.build(), probe.clone())
- })?;
-
- // Send the probe.
- match sock.send_to(&req, relay_addr).await {
- Ok(n) if n == req.len() => {
- debug!(%relay_addr, %txid, "sending {} probe", probe.proto());
- let mut result = ProbeReport::new(probe.clone());
-
- if matches!(probe, Probe::StunIpv4 { .. }) {
- result.ipv4_can_send = true;
- metrics.stun_packets_sent_ipv4.inc();
- } else {
- result.ipv6_can_send = true;
- metrics.stun_packets_sent_ipv6.inc();
- }
- let (delay, addr) = stun_rx.await.map_err(|_| {
- ProbeErrorWithProbe::Error(
- probe_error::StunSnafu.into_error(stun_error::StunChannelGoneSnafu.build()),
- probe.clone(),
+impl Probe {
+ /// Executes this particular [`Probe`], including using a delayed start if needed.
+ async fn run(
+ self,
+ delay: Duration,
+ relay_node: Arc,
+ #[cfg(not(wasm_browser))] socket_state: SocketState,
+ #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool,
+ ) -> Result {
+ if !delay.is_zero() {
+ trace!("delaying probe");
+ time::sleep(delay).await;
+ }
+ debug!("starting probe");
+
+ match self {
+ Probe::Https => {
+ match run_https_probe(
+ #[cfg(not(wasm_browser))]
+ &socket_state.dns_resolver,
+ relay_node.url.clone(),
+ #[cfg(any(test, feature = "test-utils"))]
+ insecure_skip_relay_cert_verify,
)
- })?;
- result.latency = Some(delay);
- result.addr = Some(addr);
- Ok(result)
- }
- Ok(n) => {
- let err = stun_error::SendFullSnafu.build();
- error!(%relay_addr, sent_len=n, req_len=req.len(), "{err:#}");
- Err(ProbeErrorWithProbe::Error(
- probe_error::StunSnafu.into_error(err),
- probe.clone(),
- ))
- }
- Err(err) => {
- let kind = err.kind();
- let err = stun_error::SendSnafu.into_error(err);
-
- // It is entirely normal that we are on a dual-stack machine with no
- // routed IPv6 network. So silence that case.
- // NetworkUnreachable and HostUnreachable are still experimental (io_error_more
- // #86442) but it is already emitted. So hack around this.
- match format!("{kind:?}").as_str() {
- "NetworkUnreachable" | "HostUnreachable" => {
- debug!(%relay_addr, "{err:#}");
- Err(ProbeErrorWithProbe::AbortSet(
- probe_error::StunSnafu.into_error(err),
- probe.clone(),
- ))
- }
- _ => {
- // No need to log this, our caller does already log this.
- Err(ProbeErrorWithProbe::Error(
- probe_error::StunSnafu.into_error(err),
- probe.clone(),
- ))
+ .await
+ {
+ Ok(report) => Ok(ProbeReport::Https(report)),
+ Err(err) => Err(probe_error::HttpsSnafu.into_error(err)),
}
}
+ #[cfg(not(wasm_browser))]
+ Probe::QadIpv4 | Probe::QadIpv6 => unreachable!("must not be used"),
}
}
}
#[cfg(not(wasm_browser))]
-fn maybe_to_mapped_addr(
- ip_mapped_addrs: Option,
+pub(super) fn maybe_to_mapped_addr(
+ ip_mapped_addrs: Option<&IpMappedAddresses>,
addr: SocketAddr,
) -> SocketAddr {
- if let Some(ip_mapped_addrs) = ip_mapped_addrs.as_ref() {
+ if let Some(ip_mapped_addrs) = ip_mapped_addrs {
return ip_mapped_addrs.get_or_register(addr).private_socket_addr();
}
addr
}
-/// Run a QUIC address discovery probe.
-#[cfg(not(wasm_browser))]
-async fn run_quic_probe(
- quic_config: QuicConfig,
- url: RelayUrl,
- relay_addr: SocketAddr,
- probe: Probe,
- ip_mapped_addrs: Option,
-) -> Result