@@ -43,11 +43,18 @@ use crate::tsjs;
4343use lol_html:: { element, html_content:: ContentType , text, HtmlRewriter , Settings as HtmlSettings } ;
4444
4545// Helper: normalize to absolute URL if http/https or protocol-relative. Otherwise None.
46- pub ( super ) fn to_abs ( u : & str ) -> Option < String > {
46+ // Checks against the rewrite blacklist to exclude configured domains/patterns from proxying.
47+ pub ( super ) fn to_abs ( u : & str , settings : & Settings ) -> Option < String > {
4748 let t = u. trim ( ) ;
4849 if t. is_empty ( ) {
4950 return None ;
5051 }
52+
53+ // Skip if excluded from rewrites in settings
54+ if settings. rewrite . is_excluded ( t) {
55+ return None ;
56+ }
57+
5158 // Skip non-network schemes commonly found in creatives
5259 let lower = t. to_ascii_lowercase ( ) ;
5360 if lower. starts_with ( "data:" )
@@ -59,6 +66,7 @@ pub(super) fn to_abs(u: &str) -> Option<String> {
5966 {
6067 return None ;
6168 }
69+
6270 if t. starts_with ( "//" ) {
6371 Some ( format ! ( "https:{}" , t) )
6472 } else if lower. starts_with ( "http://" ) || lower. starts_with ( "https://" ) {
@@ -106,7 +114,7 @@ pub(super) fn rewrite_style_urls(style: &str, settings: &Settings) -> String {
106114 ( s, e)
107115 } ;
108116 let url_val = & style[ qs..qe] ;
109- let new_val = if let Some ( abs) = to_abs ( url_val) {
117+ let new_val = if let Some ( abs) = to_abs ( url_val, settings ) {
110118 build_proxy_url ( settings, & abs)
111119 } else {
112120 url_val. to_string ( )
@@ -196,7 +204,7 @@ pub(super) fn build_click_url(settings: &Settings, clear_url: &str) -> String {
196204
197205#[ inline]
198206pub ( super ) fn proxy_if_abs ( settings : & Settings , val : & str ) -> Option < String > {
199- to_abs ( val) . map ( |abs| build_proxy_url ( settings, & abs) )
207+ to_abs ( val, settings ) . map ( |abs| build_proxy_url ( settings, & abs) )
200208}
201209
202210/// Split a srcset/imagesrcset attribute into candidate strings.
@@ -259,7 +267,7 @@ pub(super) fn rewrite_srcset(srcset: &str, settings: &Settings) -> String {
259267 let mut parts = it. split_whitespace ( ) ;
260268 let url = parts. next ( ) . unwrap_or ( "" ) ;
261269 let descriptor = parts. collect :: < Vec < _ > > ( ) . join ( " " ) ;
262- let rewritten = if let Some ( abs) = to_abs ( url) {
270+ let rewritten = if let Some ( abs) = to_abs ( url, settings ) {
263271 build_proxy_url ( settings, & abs)
264272 } else {
265273 url. to_string ( )
@@ -404,7 +412,7 @@ pub fn rewrite_creative_html(markup: &str, settings: &Settings) -> String {
404412 // Click-through links
405413 element!( "a[href], area[href]" , |el| {
406414 if let Some ( href) = el. get_attribute( "href" ) {
407- if let Some ( abs) = to_abs( & href) {
415+ if let Some ( abs) = to_abs( & href, settings ) {
408416 let click = build_click_url( settings, & abs) ;
409417 let _ = el. set_attribute( "href" , & click) ;
410418 let _ = el. set_attribute( "data-tsclick" , & click) ;
@@ -508,26 +516,27 @@ mod tests {
508516
509517 #[ test]
510518 fn to_abs_conversions ( ) {
519+ let settings = crate :: test_support:: tests:: create_test_settings ( ) ;
511520 assert_eq ! (
512- to_abs( "//cdn.example/x" ) ,
521+ to_abs( "//cdn.example/x" , & settings ) ,
513522 Some ( "https://cdn.example/x" . to_string( ) )
514523 ) ;
515524 assert_eq ! (
516- to_abs( "HTTPS://cdn.example/x" ) ,
525+ to_abs( "HTTPS://cdn.example/x" , & settings ) ,
517526 Some ( "HTTPS://cdn.example/x" . to_string( ) )
518527 ) ;
519528 assert_eq ! (
520- to_abs( "http://cdn.example/x" ) ,
529+ to_abs( "http://cdn.example/x" , & settings ) ,
521530 Some ( "http://cdn.example/x" . to_string( ) )
522531 ) ;
523- assert_eq ! ( to_abs( "/local/x" ) , None ) ;
532+ assert_eq ! ( to_abs( "/local/x" , & settings ) , None ) ;
524533 assert_eq ! (
525- to_abs( " //cdn.example/y " ) ,
534+ to_abs( " //cdn.example/y " , & settings ) ,
526535 Some ( "https://cdn.example/y" . to_string( ) )
527536 ) ;
528- assert_eq ! ( to_abs( "data:image/png;base64,abcd" ) , None ) ;
529- assert_eq ! ( to_abs( "javascript:alert(1)" ) , None ) ;
530- assert_eq ! ( to_abs
( "mailto:[email protected] " ) , None ) ; 537+ assert_eq ! ( to_abs( "data:image/png;base64,abcd" , & settings ) , None ) ;
538+ assert_eq ! ( to_abs( "javascript:alert(1)" , & settings ) , None ) ;
539+ assert_eq ! ( to_abs
( "mailto:[email protected] " , & settings ) , None ) ; 531540 }
532541
533542 #[ test]
@@ -981,13 +990,14 @@ mod tests {
981990
982991 #[ test]
983992 fn to_abs_additional_cases ( ) {
993+ let settings = crate :: test_support:: tests:: create_test_settings ( ) ;
984994 assert_eq ! (
985- to_abs( " https://cdn.example/a " ) ,
995+ to_abs( " https://cdn.example/a " , & settings ) ,
986996 Some ( "https://cdn.example/a" . to_string( ) )
987997 ) ;
988- assert_eq ! ( to_abs( "blob:xyz" ) , None ) ;
989- assert_eq ! ( to_abs( "tel:+123" ) , None ) ;
990- assert_eq ! ( to_abs( "about:blank" ) , None ) ;
998+ assert_eq ! ( to_abs( "blob:xyz" , & settings ) , None ) ;
999+ assert_eq ! ( to_abs( "tel:+123" , & settings ) , None ) ;
1000+ assert_eq ! ( to_abs( "about:blank" , & settings ) , None ) ;
9911001 }
9921002
9931003 #[ test]
@@ -1003,4 +1013,134 @@ mod tests {
10031013 // relative candidate remains
10041014 assert ! ( out. contains( "/local/img.png 1x" ) ) ;
10051015 }
1016+
1017+ #[ test]
1018+ fn to_abs_respects_exclude_domains ( ) {
1019+ let mut settings = crate :: test_support:: tests:: create_test_settings ( ) ;
1020+ settings. rewrite . exclude_domains = vec ! [ "trusted-cdn.example.com" . to_string( ) ] ;
1021+
1022+ // Excluded domain should return None (not proxied)
1023+ assert_eq ! (
1024+ to_abs( "https://trusted-cdn.example.com/lib.js" , & settings) ,
1025+ None
1026+ ) ;
1027+
1028+ // Non-excluded domain should return Some
1029+ assert_eq ! (
1030+ to_abs( "https://other-cdn.example.com/lib.js" , & settings) ,
1031+ Some ( "https://other-cdn.example.com/lib.js" . to_string( ) )
1032+ ) ;
1033+ }
1034+
1035+ #[ test]
1036+ fn to_abs_respects_wildcard_domains ( ) {
1037+ let mut settings = crate :: test_support:: tests:: create_test_settings ( ) ;
1038+ settings. rewrite . exclude_domains = vec ! [ "*.cloudflare.com" . to_string( ) ] ;
1039+
1040+ // Should exclude base domain
1041+ assert_eq ! ( to_abs( "https://cloudflare.com/cdn.js" , & settings) , None ) ;
1042+
1043+ // Should exclude subdomain
1044+ assert_eq ! (
1045+ to_abs( "https://cdnjs.cloudflare.com/lib.js" , & settings) ,
1046+ None
1047+ ) ;
1048+
1049+ // Should not exclude different domain
1050+ assert_eq ! (
1051+ to_abs( "https://notcloudflare.com/lib.js" , & settings) ,
1052+ Some ( "https://notcloudflare.com/lib.js" . to_string( ) )
1053+ ) ;
1054+ }
1055+
1056+ #[ test]
1057+ fn rewrite_html_excludes_blacklisted_domains ( ) {
1058+ let mut settings = crate :: test_support:: tests:: create_test_settings ( ) ;
1059+ settings. rewrite . exclude_domains = vec ! [ "trusted-cdn.example.com" . to_string( ) ] ;
1060+
1061+ let html = r#"
1062+ <img src="https://trusted-cdn.example.com/logo.png">
1063+ <img src="https://other-cdn.example.com/banner.jpg">
1064+ "# ;
1065+
1066+ let out = rewrite_creative_html ( html, & settings) ;
1067+
1068+ // Excluded domain should NOT be rewritten
1069+ assert ! ( out. contains( r#"src="https://trusted-cdn.example.com/logo.png"# ) ) ;
1070+
1071+ // Non-excluded domain SHOULD be rewritten
1072+ assert ! ( out. contains( "/first-party/proxy?tsurl=" ) ) ;
1073+ assert ! ( out. contains( "other-cdn.example.com" ) ) ;
1074+ }
1075+
1076+ #[ test]
1077+ fn rewrite_srcset_excludes_blacklisted_domains ( ) {
1078+ let mut settings = crate :: test_support:: tests:: create_test_settings ( ) ;
1079+ settings. rewrite . exclude_domains = vec ! [ "trusted.example.com" . to_string( ) ] ;
1080+
1081+ let html = r#"
1082+ <img srcset="https://trusted.example.com/img-1x.png 1x, https://cdn.example.com/img-2x.png 2x">
1083+ "# ;
1084+
1085+ let out = rewrite_creative_html ( html, & settings) ;
1086+
1087+ // Excluded domain should remain as-is
1088+ assert ! ( out. contains( "https://trusted.example.com/img-1x.png 1x" ) ) ;
1089+
1090+ // Non-excluded should be proxied
1091+ assert ! ( out. contains( "/first-party/proxy?tsurl=" ) ) ;
1092+ assert ! ( out. contains( "cdn.example.com" ) ) ;
1093+ }
1094+
1095+ #[ test]
1096+ fn rewrite_style_urls_excludes_blacklisted_domains ( ) {
1097+ let mut settings = crate :: test_support:: tests:: create_test_settings ( ) ;
1098+ settings. rewrite . exclude_domains = vec ! [ "fonts.googleapis.com" . to_string( ) ] ;
1099+
1100+ let html = r#"
1101+ <style>
1102+ @font-face {
1103+ font-family: 'Test';
1104+ src: url(https://fonts.googleapis.com/font.woff2);
1105+ }
1106+ body {
1107+ background: url(https://cdn.example.com/bg.png);
1108+ }
1109+ </style>
1110+ "# ;
1111+
1112+ let out = rewrite_creative_html ( html, & settings) ;
1113+
1114+ // Excluded domain should remain unchanged
1115+ assert ! ( out. contains( "url(https://fonts.googleapis.com/font.woff2)" ) ) ;
1116+
1117+ // Non-excluded should be proxied
1118+ assert ! ( out. contains( "/first-party/proxy?tsurl=" ) ) ;
1119+ assert ! ( out. contains( "cdn.example.com" ) ) ;
1120+ }
1121+
1122+ #[ test]
1123+ fn rewrite_click_urls_excludes_blacklisted_domains ( ) {
1124+ let mut settings = crate :: test_support:: tests:: create_test_settings ( ) ;
1125+ settings. rewrite . exclude_domains = vec ! [ "trusted-landing.example.com" . to_string( ) ] ;
1126+
1127+ let html = r#"
1128+ <a href="https://trusted-landing.example.com/page">Trusted Link</a>
1129+ <a href="https://advertiser.example.com/landing">Ad Link</a>
1130+ "# ;
1131+
1132+ let out = rewrite_creative_html ( html, & settings) ;
1133+
1134+ // Excluded domain should NOT be rewritten to first-party click
1135+ assert ! ( out. contains( r#"href="https://trusted-landing.example.com/page"# ) ) ;
1136+ // The excluded link should NOT have data-tsclick since it wasn't rewritten
1137+ assert ! (
1138+ !out. contains( r#"<a href="https://trusted-landing.example.com/page" data-tsclick="# )
1139+ ) ;
1140+
1141+ // Non-excluded should be rewritten and SHOULD have data-tsclick
1142+ assert ! ( out. contains( "/first-party/click?tsurl=" ) ) ;
1143+ assert ! ( out. contains( "advertiser.example.com" ) ) ;
1144+ assert ! ( out. contains( "data-tsclick=\" /first-party/click" ) ) ;
1145+ }
10061146}
0 commit comments