Skip to content

Commit ce86238

Browse files
authored
Block high page numbers unconditionally (rust-lang#12181)
Remove conditional blocking based on user-agent and IP address blocklists. Now all requests exceeding `max_allowed_page_offset` are blocked when the endpoint uses `.limit_page_numbers()`, regardless of the requesting client's identity. This simplifies the pagination logic and removes the need for maintaining blocklists while still protecting against performance issues from deep pagination.
1 parent 25960a2 commit ce86238

File tree

4 files changed

+4
-127
lines changed

4 files changed

+4
-127
lines changed

src/config/server.rs

Lines changed: 1 addition & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use anyhow::{Context, anyhow};
2-
use ipnetwork::IpNetwork;
31
use oauth2::{ClientId, ClientSecret};
42
use url::Url;
53

@@ -53,8 +51,6 @@ pub struct Server {
5351
pub blocked_traffic: Vec<(String, Vec<String>)>,
5452
pub blocked_ips: HashSet<IpAddr>,
5553
pub max_allowed_page_offset: u32,
56-
pub page_offset_ua_blocklist: Vec<String>,
57-
pub page_offset_cidr_blocklist: Vec<IpNetwork>,
5854
pub excluded_crate_names: Vec<String>,
5955
pub domain_name: String,
6056
pub allowed_origins: AllowedOrigins,
@@ -123,12 +119,6 @@ impl Server {
123119
/// querying metrics will be completely disabled.
124120
/// - `WEB_MAX_ALLOWED_PAGE_OFFSET`: Page offsets larger than this value are rejected. Defaults
125121
/// to 200.
126-
/// - `WEB_PAGE_OFFSET_UA_BLOCKLIST`: A comma separated list of user-agent substrings that will
127-
/// be blocked if `WEB_MAX_ALLOWED_PAGE_OFFSET` is exceeded. Including an empty string in the
128-
/// list will block *all* user-agents exceeding the offset. If not set or empty, no blocking
129-
/// will occur.
130-
/// - `WEB_PAGE_OFFSET_CIDR_BLOCKLIST`: A comma separated list of CIDR blocks that will be used
131-
/// to block IP addresses, e.g. `192.168.1.0/24`. If not set or empty, no blocking will occur.
132122
/// - `INSTANCE_METRICS_LOG_EVERY_SECONDS`: How frequently should instance metrics be logged.
133123
/// If the environment variable is not present instance metrics are not logged.
134124
/// - `FORCE_UNCONDITIONAL_REDIRECTS`: Whether to force unconditional redirects in the download
@@ -156,9 +146,6 @@ impl Server {
156146
let blocked_ips = HashSet::from_iter(list_parsed("BLOCKED_IPS", IpAddr::from_str)?);
157147

158148
let allowed_origins = AllowedOrigins::from_default_env()?;
159-
let page_offset_ua_blocklist = list("WEB_PAGE_OFFSET_UA_BLOCKLIST")?;
160-
let page_offset_cidr_blocklist =
161-
list_parsed("WEB_PAGE_OFFSET_CIDR_BLOCKLIST", parse_cidr_block)?;
162149

163150
let base = Base::from_environment()?;
164151
let excluded_crate_names = list("EXCLUDED_CRATE_NAMES")?;
@@ -229,8 +216,6 @@ impl Server {
229216
blocked_traffic: blocked_traffic(),
230217
blocked_ips,
231218
max_allowed_page_offset: var_parsed("WEB_MAX_ALLOWED_PAGE_OFFSET")?.unwrap_or(200),
232-
page_offset_ua_blocklist,
233-
page_offset_cidr_blocklist,
234219
excluded_crate_names,
235220
domain_name,
236221
allowed_origins,
@@ -268,34 +253,6 @@ impl Server {
268253
}
269254
}
270255

271-
/// Parses a CIDR block string to a valid `IpNetwork` struct.
272-
///
273-
/// The purpose is to be able to block IP ranges that overload the API that uses pagination.
274-
///
275-
/// The minimum number of bits for a host prefix must be
276-
///
277-
/// * at least 16 for IPv4 based CIDRs.
278-
/// * at least 64 for IPv6 based CIDRs
279-
///
280-
fn parse_cidr_block(block: &str) -> anyhow::Result<IpNetwork> {
281-
let cidr = block
282-
.parse()
283-
.context("WEB_PAGE_OFFSET_CIDR_BLOCKLIST must contain IPv4 or IPv6 CIDR blocks.")?;
284-
285-
let host_prefix = match cidr {
286-
IpNetwork::V4(_) => 16,
287-
IpNetwork::V6(_) => 64,
288-
};
289-
290-
if cidr.prefix() < host_prefix {
291-
return Err(anyhow!(
292-
"WEB_PAGE_OFFSET_CIDR_BLOCKLIST only allows CIDR blocks with a host prefix of at least 16 bits (IPv4) or 64 bits (IPv6)."
293-
));
294-
}
295-
296-
Ok(cidr)
297-
}
298-
299256
fn blocked_traffic() -> Vec<(String, Vec<String>)> {
300257
let pattern_list = dotenvy::var("BLOCKED_TRAFFIC").unwrap_or_default();
301258
parse_traffic_patterns(&pattern_list)
@@ -346,7 +303,7 @@ impl FromStr for AllowedOrigins {
346303
#[cfg(test)]
347304
mod tests {
348305
use super::*;
349-
use claims::{assert_err, assert_none, assert_ok_eq};
306+
use claims::assert_none;
350307

351308
#[test]
352309
fn parse_traffic_patterns_splits_on_comma_and_looks_for_equal_sign() {
@@ -362,44 +319,4 @@ mod tests {
362319

363320
assert_none!(parse_traffic_patterns(pattern_string_3).next());
364321
}
365-
366-
#[test]
367-
fn parse_cidr_block_list_successfully() {
368-
assert_ok_eq!(
369-
parse_cidr_block("127.0.0.1/24"),
370-
"127.0.0.1/24".parse::<IpNetwork>().unwrap()
371-
);
372-
assert_ok_eq!(
373-
parse_cidr_block("192.168.0.1/31"),
374-
"192.168.0.1/31".parse::<IpNetwork>().unwrap()
375-
);
376-
}
377-
378-
#[test]
379-
fn parse_cidr_blocks_panics_when_host_ipv4_prefix_is_too_low() {
380-
assert_err!(parse_cidr_block("127.0.0.1/8"));
381-
}
382-
383-
#[test]
384-
fn parse_cidr_blocks_panics_when_host_ipv6_prefix_is_too_low() {
385-
assert_err!(parse_cidr_block(
386-
"2001:0db8:0123:4567:89ab:cdef:1234:5678/56"
387-
));
388-
}
389-
390-
#[test]
391-
fn parse_ipv6_based_cidr_blocks() {
392-
assert_ok_eq!(
393-
parse_cidr_block("2002::1234:abcd:ffff:c0a8:101/64"),
394-
"2002::1234:abcd:ffff:c0a8:101/64"
395-
.parse::<IpNetwork>()
396-
.unwrap()
397-
);
398-
assert_ok_eq!(
399-
parse_cidr_block("2001:0db8:0123:4567:89ab:cdef:1234:5678/92"),
400-
"2001:0db8:0123:4567:89ab:cdef:1234:5678/92"
401-
.parse::<IpNetwork>()
402-
.unwrap()
403-
);
404-
}
405322
}

src/controllers/helpers/pagination.rs

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
use crate::config::Server;
21
use crate::middleware::app::RequestApp;
32
use crate::middleware::log_request::RequestLogExt;
4-
use crate::middleware::real_ip::RealIp;
53
use crate::models::helpers::with_count::*;
6-
use crate::util::HeaderMapExt;
74
use crate::util::errors::{AppResult, bad_request};
85
use std::num::NonZeroU32;
96

@@ -16,7 +13,6 @@ use diesel::sql_types::BigInt;
1613
use diesel_async::AsyncPgConnection;
1714
use futures_util::future::BoxFuture;
1815
use futures_util::{FutureExt, TryStreamExt};
19-
use http::header;
2016
use http::request::Parts;
2117
use indexmap::IndexMap;
2218
use serde::{Deserialize, Serialize};
@@ -125,12 +121,10 @@ impl PaginationOptionsBuilder {
125121
parts.request_log().add("bot", "suspected");
126122
}
127123

128-
// Block large offsets for known violators of the crawler policy
124+
// Block large offsets for performance reasons
129125
if self.limit_page_numbers {
130126
let config = &parts.app().config;
131-
if numeric_page > config.max_allowed_page_offset
132-
&& is_useragent_or_ip_blocked(config, parts)
133-
{
127+
if numeric_page > config.max_allowed_page_offset {
134128
parts.request_log().add("cause", "large page offset");
135129

136130
let error = format!(
@@ -329,36 +323,6 @@ impl RawSeekPayload {
329323
}
330324
}
331325

332-
/// Function to check if the request is blocked.
333-
///
334-
/// A request can be blocked if either the User Agent is on the User Agent block list or if the client
335-
/// IP is on the CIDR block list.
336-
fn is_useragent_or_ip_blocked(config: &Server, req: &Parts) -> bool {
337-
let user_agent = req.headers.get_str_or_default(header::USER_AGENT);
338-
let client_ip = req.extensions.get::<RealIp>();
339-
340-
// check if user agent is blocked
341-
if config
342-
.page_offset_ua_blocklist
343-
.iter()
344-
.any(|blocked| user_agent.contains(blocked))
345-
{
346-
return true;
347-
}
348-
349-
// check if client ip is blocked, needs to be an IPv4 address
350-
if let Some(client_ip) = client_ip
351-
&& config
352-
.page_offset_cidr_blocklist
353-
.iter()
354-
.any(|blocked| blocked.contains(**client_ip))
355-
{
356-
return true;
357-
}
358-
359-
false
360-
}
361-
362326
/// Encode a payload to be used as a seek key.
363327
///
364328
/// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no

src/tests/pagination.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
use crate::builders::CrateBuilder;
22
use crate::util::{RequestHelper, TestApp};
33
use insta::assert_snapshot;
4-
use ipnetwork::IpNetwork;
54

65
#[tokio::test(flavor = "multi_thread")]
7-
async fn pagination_blocks_ip_from_cidr_block_list() {
6+
async fn pagination_blocks_high_page_numbers() {
87
let (app, anon, user) = TestApp::init()
98
.with_config(|config| {
109
config.max_allowed_page_offset = 1;
11-
config.page_offset_cidr_blocklist = vec!["127.0.0.1/24".parse::<IpNetwork>().unwrap()];
1210
})
1311
.with_user()
1412
.await;

src/tests/util/test_app.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,6 @@ fn simple_config() -> config::Server {
497497
blocked_traffic: Default::default(),
498498
blocked_ips: Default::default(),
499499
max_allowed_page_offset: 200,
500-
page_offset_ua_blocklist: vec![],
501-
page_offset_cidr_blocklist: vec![],
502500
excluded_crate_names: vec![],
503501
domain_name: "crates.io".into(),
504502
allowed_origins: Default::default(),

0 commit comments

Comments
 (0)