From 700ec7797a1f2ec1d9e2ab37d391d13d066e9f7c Mon Sep 17 00:00:00 2001 From: AhmarZaidi Date: Tue, 14 Jan 2025 12:51:44 +0530 Subject: [PATCH 1/8] Optimize URL encoding logic in get_response_header --- .../class-od-link-collection.php | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index 4e41a7af74..a2cf2f4b6d 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -248,9 +248,33 @@ public function get_response_header(): ?string { $link_headers = array(); foreach ( $this->get_prepared_links() as $link ) { - // The about:blank is present since a Link without a reference-uri is invalid so any imagesrcset would otherwise not get downloaded. - $link['href'] = isset( $link['href'] ) ? esc_url_raw( $link['href'] ) : 'about:blank'; - $link_header = '<' . $link['href'] . '>'; + // Encode only the filename part of the URL to ensure it contains only ASCII characters. + if ( isset( $link['href'] ) ) { + $parsed_url = wp_parse_url( $link['href'] ); + if ( isset( $parsed_url['path'] ) ) { + $path_segments = explode( '/', $parsed_url['path'] ); + $last_segment = array_pop( $path_segments ); + $encoded_last_segment = rawurlencode( $last_segment ); + + $encoded_path = implode( '/', $path_segments ) . '/' . $encoded_last_segment; + + $scheme = isset( $parsed_url['scheme'] ) ? $parsed_url['scheme'] : ''; + $host = isset( $parsed_url['host'] ) ? $parsed_url['host'] : ''; + + $link['href'] = esc_url_raw( $scheme . '://' . $host . $encoded_path ); + + // Append query and fragment if they exist. + $link['href'] .= isset( $parsed_url['query'] ) ? '?' . $parsed_url['query'] : ''; + $link['href'] .= isset( $parsed_url['fragment'] ) ? '#' . $parsed_url['fragment'] : ''; + } else { + $link['href'] = esc_url_raw( $link['href'] ); + } + } else { + // The about:blank is present since a Link without a reference-uri is invalid so any imagesrcset would otherwise not get downloaded. + $link['href'] = 'about:blank'; + } + + $link_header = '<' . $link['href'] . '>'; unset( $link['href'] ); foreach ( $link as $name => $value ) { /* From d1c5afddb1151e112a062f2d66f4f6a04b84f2eb Mon Sep 17 00:00:00 2001 From: AhmarZaidi Date: Thu, 30 Jan 2025 13:06:21 +0530 Subject: [PATCH 2/8] Upadate encoding logic to encode whole path --- .../class-od-link-collection.php | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index 42eafa7513..46e4f3ec61 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -255,24 +255,11 @@ public function get_response_header(): ?string { $link_headers = array(); foreach ( $this->get_prepared_links() as $link ) { - // Encode only the filename part of the URL to ensure it contains only ASCII characters. + // Check if the href contains any non-ASCII characters. if ( isset( $link['href'] ) ) { - $parsed_url = wp_parse_url( $link['href'] ); - if ( isset( $parsed_url['path'] ) ) { - $path_segments = explode( '/', $parsed_url['path'] ); - $last_segment = array_pop( $path_segments ); - $encoded_last_segment = rawurlencode( $last_segment ); - - $encoded_path = implode( '/', $path_segments ) . '/' . $encoded_last_segment; - - $scheme = isset( $parsed_url['scheme'] ) ? $parsed_url['scheme'] : ''; - $host = isset( $parsed_url['host'] ) ? $parsed_url['host'] : ''; - - $link['href'] = esc_url_raw( $scheme . '://' . $host . $encoded_path ); - - // Append query and fragment if they exist. - $link['href'] .= isset( $parsed_url['query'] ) ? '?' . $parsed_url['query'] : ''; - $link['href'] .= isset( $parsed_url['fragment'] ) ? '#' . $parsed_url['fragment'] : ''; + if ( 1 === preg_match( '/[^\x00-\x7F]/', $link['href'] ) ) { + // Decode and then encode the entire URL to handle non-ASCII characters. + $link['href'] = esc_url_raw( rawurlencode( urldecode( $link['href'] ) ) ); } else { $link['href'] = esc_url_raw( $link['href'] ); } From 505c5bbf770a57cfb2d932c30a4e8fbbe3a9da45 Mon Sep 17 00:00:00 2001 From: AhmarZaidi Date: Fri, 31 Jan 2025 18:53:08 +0530 Subject: [PATCH 3/8] Add tests and update url encode scheme separately --- .../class-od-link-collection.php | 7 ++- .../tests/test-class-od-link-collection.php | 51 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index 46e4f3ec61..fec31486a7 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -258,8 +258,11 @@ public function get_response_header(): ?string { // Check if the href contains any non-ASCII characters. if ( isset( $link['href'] ) ) { if ( 1 === preg_match( '/[^\x00-\x7F]/', $link['href'] ) ) { - // Decode and then encode the entire URL to handle non-ASCII characters. - $link['href'] = esc_url_raw( rawurlencode( urldecode( $link['href'] ) ) ); + $parsed_url = wp_parse_url( $link['href'] ); + $scheme = isset( $parsed_url['scheme'] ) ? $parsed_url['scheme'] . '://' : ''; + $rest_of_url = substr( $link['href'], strlen( $scheme ) ); + + $link['href'] = esc_url_raw( $scheme . rawurlencode( urldecode( $rest_of_url ) ) ); } else { $link['href'] = esc_url_raw( $link['href'] ); } diff --git a/plugins/optimization-detective/tests/test-class-od-link-collection.php b/plugins/optimization-detective/tests/test-class-od-link-collection.php index da274c1763..cbe2e13acd 100644 --- a/plugins/optimization-detective/tests/test-class-od-link-collection.php +++ b/plugins/optimization-detective/tests/test-class-od-link-collection.php @@ -335,6 +335,57 @@ public function data_provider_to_test_add_link(): array { 'expected_count' => 0, 'error' => 'Maximum width must be greater than zero and greater than the minimum width.', ), + 'international_domain_name' => array( + 'links_args' => array( + array( + array( + 'rel' => 'preload', + 'href' => 'https://xn--fsq.com/תמונה.jpg', + 'as' => 'image', + ), + ), + ), + 'expected_html' => ' + + ', + 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_count' => 1, + 'error' => '', + ), + 'non_ascii_path' => array( + 'links_args' => array( + array( + array( + 'rel' => 'preload', + 'href' => 'https://example.com/חנות/תמונה.jpg', + 'as' => 'image', + ), + ), + ), + 'expected_html' => ' + + ', + 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_count' => 1, + 'error' => '', + ), + 'multisite_subdirectory_non_ascii' => array( + 'links_args' => array( + array( + array( + 'rel' => 'preload', + 'href' => 'https://example.com/חנות/wp-content/uploads/2025/01/example.jpg', + 'as' => 'image', + ), + ), + ), + 'expected_html' => ' + + ', + 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_count' => 1, + 'error' => '', + ), ); } From 50591ef924f6251c7cd558dbe779f448ae6787e6 Mon Sep 17 00:00:00 2001 From: AhmarZaidi Date: Sat, 1 Feb 2025 01:21:29 +0530 Subject: [PATCH 4/8] Encode only non ascii characters --- .../class-od-link-collection.php | 19 ++++++++++--------- .../tests/test-class-od-link-collection.php | 6 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index fec31486a7..d06588c9ec 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -255,17 +255,18 @@ public function get_response_header(): ?string { $link_headers = array(); foreach ( $this->get_prepared_links() as $link ) { - // Check if the href contains any non-ASCII characters. if ( isset( $link['href'] ) ) { - if ( 1 === preg_match( '/[^\x00-\x7F]/', $link['href'] ) ) { - $parsed_url = wp_parse_url( $link['href'] ); - $scheme = isset( $parsed_url['scheme'] ) ? $parsed_url['scheme'] . '://' : ''; - $rest_of_url = substr( $link['href'], strlen( $scheme ) ); + $decoded_url = urldecode( $link['href'] ); - $link['href'] = esc_url_raw( $scheme . rawurlencode( urldecode( $rest_of_url ) ) ); - } else { - $link['href'] = esc_url_raw( $link['href'] ); - } + // Encode only non-ASCII characters. + $encoded_url = preg_replace_callback( + '/[^\x00-\x7F]/', + static function ( $matches ) { + return rawurlencode( $matches[0] ); + }, + $decoded_url + ); + $link['href'] = esc_url_raw( $encoded_url ?? '' ); } else { // The about:blank is present since a Link without a reference-uri is invalid so any imagesrcset would otherwise not get downloaded. $link['href'] = 'about:blank'; diff --git a/plugins/optimization-detective/tests/test-class-od-link-collection.php b/plugins/optimization-detective/tests/test-class-od-link-collection.php index cbe2e13acd..23f4caa8ad 100644 --- a/plugins/optimization-detective/tests/test-class-od-link-collection.php +++ b/plugins/optimization-detective/tests/test-class-od-link-collection.php @@ -348,7 +348,7 @@ public function data_provider_to_test_add_link(): array { 'expected_html' => ' ', - 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_header' => 'Link: ; rel="preload"; as="image"', 'expected_count' => 1, 'error' => '', ), @@ -365,7 +365,7 @@ public function data_provider_to_test_add_link(): array { 'expected_html' => ' ', - 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_header' => 'Link: ; rel="preload"; as="image"', 'expected_count' => 1, 'error' => '', ), @@ -382,7 +382,7 @@ public function data_provider_to_test_add_link(): array { 'expected_html' => ' ', - 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_header' => 'Link: ; rel="preload"; as="image"', 'expected_count' => 1, 'error' => '', ), From 8389a40c04cfcc2ab071ca67c14c0788851d85e2 Mon Sep 17 00:00:00 2001 From: AhmarZaidi Date: Mon, 3 Feb 2025 12:30:21 +0530 Subject: [PATCH 5/8] Update non-ascii matching logic & tests --- .../class-od-link-collection.php | 2 +- .../tests/test-class-od-link-collection.php | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index d06588c9ec..43ce51012e 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -260,7 +260,7 @@ public function get_response_header(): ?string { // Encode only non-ASCII characters. $encoded_url = preg_replace_callback( - '/[^\x00-\x7F]/', + '/[^A-Za-z0-9\-._~:\/?#\[\]@!$&\'()*+,;=%]/', static function ( $matches ) { return rawurlencode( $matches[0] ); }, diff --git a/plugins/optimization-detective/tests/test-class-od-link-collection.php b/plugins/optimization-detective/tests/test-class-od-link-collection.php index 23f4caa8ad..cdfb71000b 100644 --- a/plugins/optimization-detective/tests/test-class-od-link-collection.php +++ b/plugins/optimization-detective/tests/test-class-od-link-collection.php @@ -340,15 +340,15 @@ public function data_provider_to_test_add_link(): array { array( array( 'rel' => 'preload', - 'href' => 'https://xn--fsq.com/תמונה.jpg', + 'href' => 'https://例.example.com/תמונה.jpg', 'as' => 'image', ), ), ), 'expected_html' => ' - + ', - 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_header' => 'Link: ; rel="preload"; as="image"', 'expected_count' => 1, 'error' => '', ), @@ -374,15 +374,15 @@ public function data_provider_to_test_add_link(): array { array( array( 'rel' => 'preload', - 'href' => 'https://example.com/חנות/wp-content/uploads/2025/01/example.jpg', + 'href' => 'https://example.com/חנות/wp-content/uploads/2025/01/example.jpg?ver=1+2', 'as' => 'image', ), ), ), 'expected_html' => ' - + ', - 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_header' => 'Link: ; rel="preload"; as="image"', 'expected_count' => 1, 'error' => '', ), From ace40cbcf6157af088c0c017296de17cb3b596e9 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Mon, 3 Feb 2025 12:43:43 -0800 Subject: [PATCH 6/8] Add test case for a responsive preload link without an href --- .../class-od-link-collection.php | 2 +- .../tests/test-class-od-link-collection.php | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index 43ce51012e..0fb6485ece 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -258,7 +258,7 @@ public function get_response_header(): ?string { if ( isset( $link['href'] ) ) { $decoded_url = urldecode( $link['href'] ); - // Encode only non-ASCII characters. + // Encode characters not allowed in a URL per RFC 3986 (anything that is not among the reserved and unreserved characters). $encoded_url = preg_replace_callback( '/[^A-Za-z0-9\-._~:\/?#\[\]@!$&\'()*+,;=%]/', static function ( $matches ) { diff --git a/plugins/optimization-detective/tests/test-class-od-link-collection.php b/plugins/optimization-detective/tests/test-class-od-link-collection.php index cdfb71000b..9c08a0b056 100644 --- a/plugins/optimization-detective/tests/test-class-od-link-collection.php +++ b/plugins/optimization-detective/tests/test-class-od-link-collection.php @@ -41,6 +41,25 @@ public function data_provider_to_test_add_link(): array { 'expected_count' => 1, 'error' => '', ), + 'preload_imagesrcset_without_href' => array( + 'links_args' => array( + array( + array( + 'rel' => 'preload', + 'imagesrcset' => 'https://example.com/foo-400.jpg 400w, https://example.com/foo-800.jpg 800w', + 'imagesizes' => '(max-width: 600px) 480px, 800px', + 'as' => 'image', + 'media' => 'screen', + ), + ), + ), + 'expected_html' => ' + + ', + 'expected_header' => 'Link: ; rel="preload"; imagesrcset="https://example.com/foo-400.jpg 400w, https://example.com/foo-800.jpg 800w"; imagesizes="(max-width: 600px) 480px, 800px"; as="image"; media="screen"', + 'expected_count' => 1, + 'error' => '', + ), 'preload_with_min0_max_viewport_widths' => array( 'links_args' => array( array( From b1ed4c1cc77c7f7f3d9c3e75bdc8010e6fea8ce7 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Mon, 3 Feb 2025 12:56:07 -0800 Subject: [PATCH 7/8] Add failing test case for bare percent appearing in URL --- .../tests/test-class-od-link-collection.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/plugins/optimization-detective/tests/test-class-od-link-collection.php b/plugins/optimization-detective/tests/test-class-od-link-collection.php index 9c08a0b056..fd0904a2ca 100644 --- a/plugins/optimization-detective/tests/test-class-od-link-collection.php +++ b/plugins/optimization-detective/tests/test-class-od-link-collection.php @@ -388,6 +388,23 @@ public function data_provider_to_test_add_link(): array { 'expected_count' => 1, 'error' => '', ), + 'percent-in-path' => array( + 'links_args' => array( + array( + array( + 'rel' => 'preload', + 'href' => 'https://example.com/100%25-one-hundred-percent.png?a[1]=2', + 'as' => 'image', + ), + ), + ), + 'expected_html' => ' + + ', + 'expected_header' => 'Link: ; rel="preload"; as="image"', + 'expected_count' => 1, + 'error' => '', + ), 'multisite_subdirectory_non_ascii' => array( 'links_args' => array( array( From 7e8867fc3356c065c1bda14e6b4bf3f3ba92e3ac Mon Sep 17 00:00:00 2001 From: AhmarZaidi Date: Tue, 4 Feb 2025 14:15:58 +0530 Subject: [PATCH 8/8] Remove percentage from regex --- plugins/optimization-detective/class-od-link-collection.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/optimization-detective/class-od-link-collection.php b/plugins/optimization-detective/class-od-link-collection.php index 0fb6485ece..db371cd112 100644 --- a/plugins/optimization-detective/class-od-link-collection.php +++ b/plugins/optimization-detective/class-od-link-collection.php @@ -260,7 +260,7 @@ public function get_response_header(): ?string { // Encode characters not allowed in a URL per RFC 3986 (anything that is not among the reserved and unreserved characters). $encoded_url = preg_replace_callback( - '/[^A-Za-z0-9\-._~:\/?#\[\]@!$&\'()*+,;=%]/', + '/[^A-Za-z0-9\-._~:\/?#\[\]@!$&\'()*+,;=]/', static function ( $matches ) { return rawurlencode( $matches[0] ); },