Skip to content

Commit 77bf186

Browse files
committed
fix: ensure empty strings fallback to next metadata source in detectors
Previously, detectors would return empty strings from primary sources (like oembed) without falling back to alternative sources (like metas), losing valuable metadata. Now empty and whitespace-only strings are treated as missing data, triggering the fallback chain. Problem: When oembed or other primary sources returned empty strings instead of null, detectors would return those empty values immediately, preventing fallback to metas, linked data, or document sources that might contain valid data. Solution: Add empty string validation using trim() to ensure fallback chain executes properly: if (is_string($result) && trim($result) !== '') Impact: - AuthorName: Empty oembed author_name now falls back to metas - Title: Empty oembed/metas titles now fall back to document <title> - Description: Empty oembed/metas descriptions fall back to linked data - ProviderName: Empty oembed/metas names fall back to hostname - Language: Empty html lang attributes fall back to meta tags This improves metadata extraction quality by utilizing all available sources instead of stopping at the first non-null but empty response.
1 parent cfa2b6d commit 77bf186

File tree

5 files changed

+11
-11
lines changed

5 files changed

+11
-11
lines changed

src/Detectors/AuthorName.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public function detect(): ?string
1111
$metas = $this->extractor->getMetas();
1212

1313
$result = $oembed->str('author_name');
14-
if ($result !== null) {
14+
if (is_string($result) && trim($result) !== '') {
1515
return $result;
1616
}
1717

src/Detectors/Description.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public function detect(): ?string
1212
$ld = $this->extractor->getLinkedData();
1313

1414
$result = $oembed->str('description');
15-
if ($result !== null) {
15+
if (is_string($result) && trim($result) !== '') {
1616
return $result;
1717
}
1818

@@ -27,7 +27,7 @@ public function detect(): ?string
2727
'excerpt',
2828
'article.summary'
2929
);
30-
if ($result !== null) {
30+
if (is_string($result) && trim($result) !== '') {
3131
return $result;
3232
}
3333

src/Detectors/Language.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,22 @@ public function detect(): ?string
1212
$ld = $this->extractor->getLinkedData();
1313

1414
$result = $document->select('/html')->str('lang');
15-
if ($result !== null) {
15+
if (is_string($result) && trim($result) !== '') {
1616
return $result;
1717
}
1818

1919
$result = $document->select('/html')->str('xml:lang');
20-
if ($result !== null) {
20+
if (is_string($result) && trim($result) !== '') {
2121
return $result;
2222
}
2323

2424
$result = $metas->str('language', 'lang', 'og:locale', 'dc:language');
25-
if ($result !== null) {
25+
if (is_string($result) && trim($result) !== '') {
2626
return $result;
2727
}
2828

2929
$result = $document->select('.//meta', ['http-equiv' => 'content-language'])->str('content');
30-
if ($result !== null) {
30+
if (is_string($result) && trim($result) !== '') {
3131
return $result;
3232
}
3333

src/Detectors/ProviderName.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public function detect(): string
1414
$metas = $this->extractor->getMetas();
1515

1616
$result = $oembed->str('provider_name');
17-
if ($result !== null) {
17+
if (is_string($result) && trim($result) !== '') {
1818
return $result;
1919
}
2020

@@ -24,7 +24,7 @@ public function detect(): string
2424
'publisher',
2525
'article:publisher'
2626
);
27-
if ($result !== null) {
27+
if (is_string($result) && trim($result) !== '') {
2828
return $result;
2929
}
3030

src/Detectors/Title.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public function detect(): ?string
1212
$metas = $this->extractor->getMetas();
1313

1414
$result = $oembed->str('title');
15-
if ($result !== null) {
15+
if (is_string($result) && trim($result) !== '') {
1616
return $result;
1717
}
1818

@@ -26,7 +26,7 @@ public function detect(): ?string
2626
'article.headline',
2727
'parsely-title'
2828
);
29-
if ($result !== null) {
29+
if (is_string($result) && trim($result) !== '') {
3030
return $result;
3131
}
3232

0 commit comments

Comments
 (0)