@@ -255,7 +255,6 @@ public function detectIfURLsShouldBeHarvested() : void {
255255 public function processLink ( DOMElement $ element ) : void {
256256 $ this ->normalizeURL ( $ element , 'href ' );
257257 $ this ->forceHTTPS ( $ element , 'href ' );
258- $ this ->removeQueryStringFromInternalLink ( $ element );
259258 $ this ->addDiscoveredURL ( $ element ->getAttribute ( 'href ' ) );
260259 $ this ->rewriteWPPaths ( $ element );
261260 $ this ->rewriteBaseURL ( $ element );
@@ -375,10 +374,6 @@ public function processImageSrcSet( DOMElement $element ) : void {
375374 // normalize urls
376375 if ( $ this ->isInternalLink ( $ url ) ) {
377376 $ url = $ this ->page_url ->resolve ( $ url );
378-
379- // TODO: preserve query string when reforming
380- // rm query string
381- $ url = strtok ( $ url , '? ' );
382377 $ this ->addDiscoveredURL ( (string ) $ url );
383378 $ url = $ this ->rewriteWPPathsSrcSetURL ( (string ) $ url );
384379 $ url = $ this ->rewriteBaseURLSrcSetURL ( $ url );
@@ -404,7 +399,6 @@ public function processImageSrcSet( DOMElement $element ) : void {
404399 public function processImage ( DOMElement $ element ) : void {
405400 $ this ->normalizeURL ( $ element , 'src ' );
406401 $ this ->forceHTTPS ( $ element , 'src ' );
407- $ this ->removeQueryStringFromInternalLink ( $ element );
408402 $ this ->addDiscoveredURL ( $ element ->getAttribute ( 'src ' ) );
409403 $ this ->rewriteWPPaths ( $ element );
410404 $ this ->rewriteBaseURL ( $ element );
@@ -413,7 +407,6 @@ public function processImage( DOMElement $element ) : void {
413407 public function processGenericSrc ( DOMElement $ element ) : void {
414408 $ this ->normalizeURL ( $ element , 'src ' );
415409 $ this ->forceHTTPS ( $ element , 'src ' );
416- $ this ->removeQueryStringFromInternalLink ( $ element );
417410 $ this ->addDiscoveredURL ( $ element ->getAttribute ( 'src ' ) );
418411 $ this ->rewriteWPPaths ( $ element );
419412 $ this ->rewriteBaseURL ( $ element );
@@ -422,7 +415,6 @@ public function processGenericSrc( DOMElement $element ) : void {
422415 public function processGenericHref ( DOMElement $ element ) : void {
423416 $ this ->normalizeURL ( $ element , 'href ' );
424417 $ this ->forceHTTPS ( $ element , 'href ' );
425- $ this ->removeQueryStringFromInternalLink ( $ element );
426418 $ this ->addDiscoveredURL ( $ element ->getAttribute ( 'href ' ) );
427419 $ this ->rewriteWPPaths ( $ element );
428420 $ this ->rewriteBaseURL ( $ element );
@@ -504,7 +496,6 @@ public function processHead( DOMElement $element ) : void {
504496 public function processScript ( DOMElement $ element ) : void {
505497 $ this ->normalizeURL ( $ element , 'src ' );
506498 $ this ->forceHTTPS ( $ element , 'src ' );
507- $ this ->removeQueryStringFromInternalLink ( $ element );
508499 $ this ->addDiscoveredURL ( $ element ->getAttribute ( 'src ' ) );
509500 $ this ->rewriteWPPaths ( $ element );
510501 $ this ->rewriteBaseURL ( $ element );
@@ -530,7 +521,6 @@ public function processAnchor( DOMElement $element ) : void {
530521 }
531522
532523 $ this ->normalizeURL ( $ element , 'href ' );
533- $ this ->removeQueryStringFromInternalLink ( $ element );
534524 $ this ->addDiscoveredURL ( $ url );
535525 $ this ->rewriteWPPaths ( $ element );
536526 $ this ->rewriteBaseURL ( $ element );
@@ -571,7 +561,6 @@ public function processMeta( DOMElement $element ) : void {
571561 $ url = $ element ->getAttribute ( 'content ' );
572562 $ this ->normalizeURL ( $ element , 'content ' );
573563 $ this ->forceHTTPS ( $ element , 'content ' );
574- $ this ->removeQueryStringFromInternalLink ( $ element );
575564 $ this ->addDiscoveredURL ( $ url );
576565 $ this ->rewriteWPPaths ( $ element );
577566 $ this ->rewriteBaseURL ( $ element );
@@ -678,32 +667,6 @@ public function isInternalLink( string $link ) : bool {
678667 return false ;
679668 }
680669
681- public function removeQueryStringFromInternalLink ( DOMElement $ element ) : void {
682- $ attribute_to_change = '' ;
683- $ url_to_change = '' ;
684-
685- if ( $ element ->hasAttribute ( 'href ' ) ) {
686- $ attribute_to_change = 'href ' ;
687- } elseif ( $ element ->hasAttribute ( 'src ' ) ) {
688- $ attribute_to_change = 'src ' ;
689- } elseif ( $ element ->hasAttribute ( 'content ' ) ) {
690- $ attribute_to_change = 'content ' ;
691- } else {
692- return ;
693- }
694-
695- $ url_to_change = $ element ->getAttribute ( $ attribute_to_change );
696-
697- if ( $ this ->isInternalLink ( $ url_to_change ) ) {
698- // strip anything from the ? onwards
699- // https://stackoverflow.com/a/42476194/1668057
700- $ element ->setAttribute (
701- $ attribute_to_change ,
702- (string ) strtok ( $ url_to_change , '? ' )
703- );
704- }
705- }
706-
707670 public function detectEscapedSiteURLs ( string $ processed_html ) : string {
708671 // NOTE: this does return the expected http:\/\/172.18.0.3
709672 // but your error log may escape again and
0 commit comments