Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 69 additions & 46 deletions scripts/link-checker.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,41 +149,75 @@ async function checkLinks() {
}

const getPageData = async () => {
try {
const response = await nFetch(externalPageLink, {
headers: {
// Spoof a normal looking User-Agent to keep the servers happy
// See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
//
// To better future-proof against the ua string being
// responsible for any breakage, pull common, up-to-date strings
// from a reliable source.
// https://github.com/w3c/aria-practices/issues/3270
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1',
},
});
const text = await response.text();
const html = HTMLParser.parse(text);
const ids = html
.querySelectorAll('[id]')
.map((idElement) => idElement.getAttribute('id'));

// Handle GitHub README links.
// These links are stored within a react-partial element
const reactPartial = getReactPartial(hrefOrSrc, html);
return {
ok: response.ok,
status: response.status,
ids,
reactPartial,
};
} catch (error) {
return {
errorMessage:
`Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}\n` +
` ${error.stack}`,
};
const domain = new URL(externalPageLink).hostname;
let retryCount = 0;
const maxRetries = 3;
const baseDelay = 15;

while (retryCount < maxRetries) {
try {
const response = await nFetch(externalPageLink, {
headers: {
// Spoof a normal looking User-Agent to keep the servers happy
// See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
//
// To better future-proof against the ua string being
// responsible for any breakage, pull common, up-to-date strings
// from a reliable source.
// https://github.com/w3c/aria-practices/issues/3270
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1',
},
});

// Handle rate limit-related statuses
if (
response.status === 403 ||
response.status === 429 ||
response.status === 503 ||
response.status === 508
) {
throw new Error(
response.status === 429
? `Rate limited by ${domain}`
: `Unsuccessful response from ${domain} (${response.status})`
);
}

const text = await response.text();
const html = HTMLParser.parse(text);
const ids = html
.querySelectorAll('[id]')
.map((idElement) => idElement.getAttribute('id'));

// Handle GitHub README links.
// These links are stored within a react-partial element
const reactPartial = getReactPartial(hrefOrSrc, html);
return {
ok: response.ok,
status: response.status,
ids,
reactPartial,
};
} catch (error) {
if (retryCount < maxRetries) {
// Found the retry-after unit returned from response headers too
// variable to use here, but ~15 seconds seems like a safe
// initial default
const delay = baseDelay * 1000 * Math.pow(2, retryCount);
console.info(
`Error fetching ${externalPageLink}: ${error.message}, retrying in ${delay}ms`
);
await new Promise((resolve) => setTimeout(resolve, delay));
retryCount++;
continue;
}
return {
errorMessage:
`Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}\n` +
` ${error.stack}`,
};
}
}
};

Expand Down Expand Up @@ -213,17 +247,6 @@ async function checkLinks() {
Object.entries(externalPageLoaders).map(
async ([externalPageLink, getPageData]) => {
let pageData = await getPageData();
if (pageData.errorMessage) {
console.info('Retrying once');
pageData = await getPageData();
}
if (pageData.errorMessage) {
await new Promise((resolve) => {
setTimeout(resolve, 2000);
});
console.info('Retrying twice');
pageData = await getPageData();
}
externalPageData[externalPageLink] = pageData;
loadedCount += 1;
}
Expand Down