From 953f23f674f1cca64f7a77fdffd93119e219f423 Mon Sep 17 00:00:00 2001 From: Theo Date: Mon, 21 Oct 2024 14:41:48 +0700 Subject: [PATCH 1/2] Add retry mechanism to connect to PostgreSQL before starting maplibre (#1539) --- Cargo.lock | 12 ++++++++++++ martin/Cargo.toml | 1 + martin/src/pg/config.rs | 17 +++++++++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c796d477..7964eb2e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2544,6 +2544,7 @@ dependencies = [ "tilejson", "tokio", "tokio-postgres-rustls", + "tokio-retry", "url", ] @@ -4948,6 +4949,17 @@ dependencies = [ "x509-cert", ] +[[package]] +name = "tokio-retry" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" +dependencies = [ + "pin-project", + "rand", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.0" diff --git a/martin/Cargo.toml b/martin/Cargo.toml index 28dc1c4d8..169c3a30a 100644 --- a/martin/Cargo.toml +++ b/martin/Cargo.toml @@ -113,6 +113,7 @@ tilejson.workspace = true tokio = { workspace = true, features = ["io-std"] } tokio-postgres-rustls = { workspace = true, optional = true } url.workspace = true +tokio-retry = "0.3.0" [build-dependencies] static-files = { workspace = true, optional = true } diff --git a/martin/src/pg/config.rs b/martin/src/pg/config.rs index 39ad64d9c..ef2d2eeda 100644 --- a/martin/src/pg/config.rs +++ b/martin/src/pg/config.rs @@ -15,7 +15,9 @@ use crate::pg::utils::on_slow; use crate::pg::PgResult; use crate::source::TileInfoSources; use crate::utils::{IdResolver, OptBoolObj, OptOneMany}; -use crate::MartinResult; +use crate::{MartinError, MartinResult}; +use tokio_retry::strategy::{jitter, FixedInterval}; +use tokio_retry::Retry; pub trait PgInfo { fn format_id(&self) -> String; @@ -114,7 +116,18 @@ impl PgConfig { } pub async fn resolve(&mut self, id_resolver: IdResolver) -> MartinResult { - let pg = PgBuilder::new(self, id_resolver).await?; + // Retry strategy: Fixed 5 seconds interval backoff with jitter (random variation) + let retry_strategy = FixedInterval::from_millis(5000) + .map(jitter) // Add random jitter to avoid "thundering herd" problem + .take(3); // Retry up to 3 times + + // Create PgBuilder using retry_strategy + let pg = Retry::spawn(retry_strategy, || async { + PgBuilder::new(self, id_resolver.clone()).await + }) + .await + .map_err(MartinError::PostgresError)?; + let inst_tables = on_slow( pg.instantiate_tables(), // warn only if default bounds timeout has already passed From 9460050932e054af7af12c94e2aff1ce18e1142a Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Fri, 21 Feb 2025 01:00:14 +0100 Subject: [PATCH 2/2] make sure that jitter is not all or nothing and move to tokio-retry2 --- Cargo.lock | 24 ++++++++++++------------ Cargo.toml | 1 + martin/Cargo.toml | 2 +- martin/src/pg/config.rs | 22 +++++++++++----------- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5082d5f82..76cbd659e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1425,7 +1425,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2409,7 +2409,7 @@ checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" dependencies = [ "hermit-abi 0.4.0", "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2608,7 +2608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -2766,7 +2766,7 @@ dependencies = [ "tilejson", "tokio", "tokio-postgres-rustls", - "tokio-retry", + "tokio-retry2", "url", ] @@ -3726,7 +3726,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4111,7 +4111,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5020,7 +5020,7 @@ dependencies = [ "getrandom 0.2.15", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5318,13 +5318,13 @@ dependencies = [ ] [[package]] -name = "tokio-retry" -version = "0.3.0" +name = "tokio-retry2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" +checksum = "1264d076dd34560544a2799e40e457bd07c43d30f4a845686b031bcd8455c84f" dependencies = [ "pin-project", - "rand", + "rand 0.9.0", "tokio", ] @@ -5924,7 +5924,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0acf6e357..0860ef397 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,7 @@ tokio = { version = "1", features = ["macros"] } tokio-postgres-rustls = "0.13" url = "2.5" xxhash-rust = { version = "0.8", features = ["xxh3"] } +tokio-retry2 = { version = "0.5.7", features = ["jitter"] } [profile.dev.package] # See https://github.com/launchbadge/sqlx#compile-time-verification diff --git a/martin/Cargo.toml b/martin/Cargo.toml index 462deb2ce..22fc81ed9 100644 --- a/martin/Cargo.toml +++ b/martin/Cargo.toml @@ -116,7 +116,7 @@ tilejson.workspace = true tokio = { workspace = true, features = ["io-std"] } tokio-postgres-rustls = { workspace = true, optional = true } url.workspace = true -tokio-retry = "0.3.0" +tokio-retry2.workspace = true [build-dependencies] static-files = { workspace = true, optional = true } diff --git a/martin/src/pg/config.rs b/martin/src/pg/config.rs index 19c1975de..31b81c654 100644 --- a/martin/src/pg/config.rs +++ b/martin/src/pg/config.rs @@ -16,9 +16,9 @@ use crate::pg::PgError; use crate::pg::PgResult; use crate::source::TileInfoSources; use crate::utils::{IdResolver, OptBoolObj, OptOneMany}; -use crate::{MartinError, MartinResult}; -use tokio_retry::strategy::{jitter, FixedInterval}; -use tokio_retry::Retry; +use crate::MartinResult; +use tokio_retry2::strategy::{jitter_range, FixedInterval}; +use tokio_retry2::{Retry, RetryError}; pub trait PgInfo { fn format_id(&self) -> String; @@ -135,17 +135,17 @@ impl PgConfig { } pub async fn resolve(&mut self, id_resolver: IdResolver) -> MartinResult { - // Retry strategy: Fixed 5 seconds interval backoff with jitter (random variation) - let retry_strategy = FixedInterval::from_millis(5000) - .map(jitter) // Add random jitter to avoid "thundering herd" problem - .take(3); // Retry up to 3 times + // waits a maximum of 20s..25s before failing permanently + let retry_strategy = FixedInterval::from_millis(500) + .map(jitter_range(0.8, 1.0)) // jitter 400ms..500ms => no thundering herd + .take(50); - // Create PgBuilder using retry_strategy let pg = Retry::spawn(retry_strategy, || async { - PgBuilder::new(self, id_resolver.clone()).await + PgBuilder::new(self, id_resolver.clone()) + .await + .map_err(RetryError::transient) }) - .await - .map_err(MartinError::PostgresError)?; + .await?; let inst_tables = on_slow( pg.instantiate_tables(),