Skip to content

Commit 924b677

Browse files
committed
fixes #5 preserve query strings in output
1 parent dc0bc62 commit 924b677

File tree

4 files changed

+2
-41
lines changed

4 files changed

+2
-41
lines changed

src/HTMLProcessor.php

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ public function detectIfURLsShouldBeHarvested() : void {
255255
public function processLink( DOMElement $element ) : void {
256256
$this->normalizeURL( $element, 'href' );
257257
$this->forceHTTPS( $element, 'href' );
258-
$this->removeQueryStringFromInternalLink( $element );
259258
$this->addDiscoveredURL( $element->getAttribute( 'href' ) );
260259
$this->rewriteWPPaths( $element );
261260
$this->rewriteBaseURL( $element );
@@ -375,10 +374,6 @@ public function processImageSrcSet( DOMElement $element ) : void {
375374
// normalize urls
376375
if ( $this->isInternalLink( $url ) ) {
377376
$url = $this->page_url->resolve( $url );
378-
379-
// TODO: preserve query string when reforming
380-
// rm query string
381-
$url = strtok( $url, '?' );
382377
$this->addDiscoveredURL( (string) $url );
383378
$url = $this->rewriteWPPathsSrcSetURL( (string) $url );
384379
$url = $this->rewriteBaseURLSrcSetURL( $url );
@@ -404,7 +399,6 @@ public function processImageSrcSet( DOMElement $element ) : void {
404399
public function processImage( DOMElement $element ) : void {
405400
$this->normalizeURL( $element, 'src' );
406401
$this->forceHTTPS( $element, 'src' );
407-
$this->removeQueryStringFromInternalLink( $element );
408402
$this->addDiscoveredURL( $element->getAttribute( 'src' ) );
409403
$this->rewriteWPPaths( $element );
410404
$this->rewriteBaseURL( $element );
@@ -413,7 +407,6 @@ public function processImage( DOMElement $element ) : void {
413407
public function processGenericSrc( DOMElement $element ) : void {
414408
$this->normalizeURL( $element, 'src' );
415409
$this->forceHTTPS( $element, 'src' );
416-
$this->removeQueryStringFromInternalLink( $element );
417410
$this->addDiscoveredURL( $element->getAttribute( 'src' ) );
418411
$this->rewriteWPPaths( $element );
419412
$this->rewriteBaseURL( $element );
@@ -422,7 +415,6 @@ public function processGenericSrc( DOMElement $element ) : void {
422415
public function processGenericHref( DOMElement $element ) : void {
423416
$this->normalizeURL( $element, 'href' );
424417
$this->forceHTTPS( $element, 'href' );
425-
$this->removeQueryStringFromInternalLink( $element );
426418
$this->addDiscoveredURL( $element->getAttribute( 'href' ) );
427419
$this->rewriteWPPaths( $element );
428420
$this->rewriteBaseURL( $element );
@@ -504,7 +496,6 @@ public function processHead( DOMElement $element ) : void {
504496
public function processScript( DOMElement $element ) : void {
505497
$this->normalizeURL( $element, 'src' );
506498
$this->forceHTTPS( $element, 'src' );
507-
$this->removeQueryStringFromInternalLink( $element );
508499
$this->addDiscoveredURL( $element->getAttribute( 'src' ) );
509500
$this->rewriteWPPaths( $element );
510501
$this->rewriteBaseURL( $element );
@@ -530,7 +521,6 @@ public function processAnchor( DOMElement $element ) : void {
530521
}
531522

532523
$this->normalizeURL( $element, 'href' );
533-
$this->removeQueryStringFromInternalLink( $element );
534524
$this->addDiscoveredURL( $url );
535525
$this->rewriteWPPaths( $element );
536526
$this->rewriteBaseURL( $element );
@@ -571,7 +561,6 @@ public function processMeta( DOMElement $element ) : void {
571561
$url = $element->getAttribute( 'content' );
572562
$this->normalizeURL( $element, 'content' );
573563
$this->forceHTTPS( $element, 'content' );
574-
$this->removeQueryStringFromInternalLink( $element );
575564
$this->addDiscoveredURL( $url );
576565
$this->rewriteWPPaths( $element );
577566
$this->rewriteBaseURL( $element );
@@ -678,32 +667,6 @@ public function isInternalLink( string $link ) : bool {
678667
return false;
679668
}
680669

681-
public function removeQueryStringFromInternalLink( DOMElement $element ) : void {
682-
$attribute_to_change = '';
683-
$url_to_change = '';
684-
685-
if ( $element->hasAttribute( 'href' ) ) {
686-
$attribute_to_change = 'href';
687-
} elseif ( $element->hasAttribute( 'src' ) ) {
688-
$attribute_to_change = 'src';
689-
} elseif ( $element->hasAttribute( 'content' ) ) {
690-
$attribute_to_change = 'content';
691-
} else {
692-
return;
693-
}
694-
695-
$url_to_change = $element->getAttribute( $attribute_to_change );
696-
697-
if ( $this->isInternalLink( $url_to_change ) ) {
698-
// strip anything from the ? onwards
699-
// https://stackoverflow.com/a/42476194/1668057
700-
$element->setAttribute(
701-
$attribute_to_change,
702-
(string) strtok( $url_to_change, '?' )
703-
);
704-
}
705-
}
706-
707670
public function detectEscapedSiteURLs( string $processed_html ) : string {
708671
// NOTE: this does return the expected http:\/\/172.18.0.3
709672
// but your error log may escape again and

tests/HTMLProcessorTest.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ public function rewritePlaceholdersProvider() {
307307
* @covers StaticHTMLOutput\HTMLProcessor::processHead
308308
* @covers StaticHTMLOutput\HTMLProcessor::processLink
309309
* @covers StaticHTMLOutput\HTMLProcessor::processMeta
310-
* @covers StaticHTMLOutput\HTMLProcessor::removeQueryStringFromInternalLink
311310
* @covers StaticHTMLOutput\HTMLProcessor::rewriteWPPaths
312311
* @covers StaticHTMLOutput\HTMLProcessor::forceHTTPS
313312
* @covers StaticHTMLOutput\HTMLProcessor::processImage
@@ -329,7 +328,6 @@ public function rewritePlaceholdersProvider() {
329328
* @covers StaticHTMLOutput\CSSProcessor::rewritePlaceholderURLsToDestination
330329
* @covers StaticHTMLOutput\CSSProcessor::rewriteSiteURLsToPlaceholder
331330
* @covers StaticHTMLOutput\CSSProcessor::writeDiscoveredURLs
332-
333331
* @dataProvider processHTMLProvider
334332
*/
335333
public function testProcessHTML(

tests/data/HTMLProcessorTest/input_process_links_without_stripping.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
<link rel="prev" href="http://mydomain.com">
3232
<link rel="section" href="http://mydomain.com">
3333
<link rel="start" href="http://mydomain.com">
34-
<link rel="stylesheet" href="styles.css">
34+
<link rel="stylesheet" href="styles.css?ver=something&param=somethingelse">
3535
<link rel="stylesheet" href="//mydomain.com/styles.css">
3636
<link rel="subsection" href="http://mydomain.com">
3737
<link rel="wlwmanifest" href="http://mydomain.com">

tests/data/HTMLProcessorTest/output_process_links_without_stripping.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
<link rel="prev" href="https://mynewdomain.com">
3232
<link rel="section" href="https://mynewdomain.com">
3333
<link rel="start" href="https://mynewdomain.com">
34-
<link rel="stylesheet" href="https://mynewdomain.com/styles.css">
34+
<link rel="stylesheet" href="https://mynewdomain.com/styles.css?ver=something&amp;param=somethingelse">
3535
<link rel="stylesheet" href="https://mynewdomain.com/styles.css">
3636
<link rel="subsection" href="https://mynewdomain.com">
3737
<link rel="wlwmanifest" href="https://mynewdomain.com">

0 commit comments

Comments
 (0)