diff --git a/.changeset/fuzzy-kids-repair.md b/.changeset/fuzzy-kids-repair.md new file mode 100644 index 0000000..9fe3136 --- /dev/null +++ b/.changeset/fuzzy-kids-repair.md @@ -0,0 +1,6 @@ +--- +'@patricktree/fetch-favicon': minor +--- + +- add a fetch-favicon CLI for batch URL input +- keep fetching other favicons when one fails, with clearer source logging diff --git a/packages/fetch-favicon/package.json b/packages/fetch-favicon/package.json index 2c951b5..7e2d62f 100644 --- a/packages/fetch-favicon/package.json +++ b/packages/fetch-favicon/package.json @@ -45,8 +45,10 @@ "turbo:lint:fix": "pnpm run lint:file . --fix" }, "dependencies": { + "@commander-js/extra-typings": "^14.0.0", "@patricktree/commons-ecma": "workspace:^", "@patricktree/commons-node": "workspace:^", + "commander": "^14.0.2", "tiny-invariant": "^1.3.3", "zod": "^3.23.8" }, diff --git a/packages/fetch-favicon/src/fetch-favicon-cli.ts b/packages/fetch-favicon/src/fetch-favicon-cli.ts new file mode 100644 index 0000000..3306281 --- /dev/null +++ b/packages/fetch-favicon/src/fetch-favicon-cli.ts @@ -0,0 +1,45 @@ +#!/usr/bin/env node +/* eslint-disable n/no-process-exit -- is a CLI */ + +import * as commander from '@commander-js/extra-typings'; +import { writeFile } from 'node:fs/promises'; + +import { fetchFavicons } from '#pkg/index.js'; + +const program = new commander.Command() + .name('fetch-favicon') + .addArgument(new commander.Argument('[url...]')) + .addOption(new commander.Option('-o, --output ', 'Write JSON output to a file')) + .addOption(new commander.Option('--stdin', 'Read URLs from stdin (whitespace-separated)')) + .addOption(new commander.Option('--no-pretty', 'Minify JSON output')); + +program.parse(); + +const options = program.opts(); +const hrefs = program.processedArgs[0]; +const shouldReadStdin = options.stdin || (!process.stdin.isTTY && hrefs.length === 0); +const stdinUrls = shouldReadStdin ? await readStdinUrls() : []; +const uniqueHrefs = [...new Set([...hrefs, ...stdinUrls])]; + +if (uniqueHrefs.length === 0) { + program.outputHelp(); + process.exit(1); +} + +const normalizedHrefs = uniqueHrefs.map((href) => new URL(href).href); +const result = await fetchFavicons(normalizedHrefs); +const json = JSON.stringify(result, null, options.pretty ? 2 : 0); + +if (options.output) { + await writeFile(options.output, `${json}\n`, 'utf8'); +} else { + console.log(json); +} + +async function readStdinUrls(): Promise { + const chunks: string[] = []; + for await (const chunk of process.stdin) { + chunks.push(String(chunk)); + } + return chunks.join('').split(/\s+/).filter(Boolean); +} diff --git a/packages/fetch-favicon/src/index.ts b/packages/fetch-favicon/src/index.ts index f50f9cc..5e156b7 100644 --- a/packages/fetch-favicon/src/index.ts +++ b/packages/fetch-favicon/src/index.ts @@ -2,7 +2,6 @@ import playwright from 'playwright'; import invariant from 'tiny-invariant'; import { z } from 'zod'; -import { arrays } from '@patricktree/commons-ecma/util/arrays'; import { check } from '@patricktree/commons-ecma/util/assert'; import { binaryUtils } from '@patricktree/commons-node/utils/binary'; @@ -45,29 +44,39 @@ export async function fetchFavicons(hrefs: string[]): Promise }> = []; + function addIconTarget(iconURL: string, websiteHref: string) { + const existing = allIconURLs.find((entry) => entry.url.href === iconURL); + if (existing) { + existing.sources.add(websiteHref); + return; + } + allIconURLs.push({ url: new URL(iconURL), sources: new Set([websiteHref]) }); + } + for (const [websiteHref, entry] of Object.entries(websites)) { invariant(entry); if (check.isNonEmptyString(entry.iconURLs.light)) { - allIconURLs.push(new URL(entry.iconURLs.light)); + addIconTarget(entry.iconURLs.light, websiteHref); } if (check.isNonEmptyString(entry.iconURLs.dark)) { - allIconURLs.push(new URL(entry.iconURLs.dark)); + addIconTarget(entry.iconURLs.dark, websiteHref); } } - allIconURLs = arrays.uniqueValues(allIconURLs); console.log('Step #3: Go to every favicon URL and store the favicon as a data URL'); const icons: FaviconsForWebsites['icons'] = {}; - await Promise.all( - allIconURLs.map(async (url) => { - console.log(`Fetching favicon from ${url.href}`); + for (const { url, sources } of allIconURLs) { + console.log(`Fetching favicon from ${url.href}`); + try { const response = await fetchUrl(url); const blob = await response.blob(); const dataURL = await binaryUtils.convertBlobToDataURL(blob); icons[url.href] = { dataURL }; - }), - ); + } catch (error) { + const sourcesText = [...sources].join(', '); + console.error(`Failed to fetch favicon from ${url.href} (source: ${sourcesText})`, error); + } + } console.log('Step #4: Close the puppeteer browser'); await browser.close(); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ee0e09f..11be0ac 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -137,12 +137,18 @@ importers: packages/fetch-favicon: dependencies: + '@commander-js/extra-typings': + specifier: ^14.0.0 + version: 14.0.0(commander@14.0.2) '@patricktree/commons-ecma': specifier: workspace:^ version: link:../commons-ecma '@patricktree/commons-node': specifier: workspace:^ version: link:../commons-node + commander: + specifier: ^14.0.2 + version: 14.0.2 tiny-invariant: specifier: ^1.3.3 version: 1.3.3 @@ -1899,6 +1905,10 @@ packages: resolution: {integrity: sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==} engines: {node: '>=20'} + commander@14.0.2: + resolution: {integrity: sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ==} + engines: {node: '>=20'} + commander@6.2.1: resolution: {integrity: sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==} engines: {node: '>= 6'} @@ -5219,6 +5229,10 @@ snapshots: dependencies: commander: 14.0.0 + '@commander-js/extra-typings@14.0.0(commander@14.0.2)': + dependencies: + commander: 14.0.2 + '@cypress/request@3.0.1': dependencies: aws-sign2: 0.7.0 @@ -6602,6 +6616,8 @@ snapshots: commander@14.0.0: {} + commander@14.0.2: {} + commander@6.2.1: {} comment-parser@1.4.1: {}