diff --git a/README.md b/README.md index 0753bd9..db970fd 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,13 @@ md-tree stats README.md md-tree toc README.md --max-level 3 ``` +### Check links + +```bash +md-tree check-links README.md +md-tree check-links README.md --recursive +``` + ### Complete CLI options ```bash @@ -144,13 +151,17 @@ console.log(sectionMarkdown); ### Advanced Usage ```javascript -import { MarkdownTreeParser, createParser, extractSection } from 'markdown-tree-parser'; +import { + MarkdownTreeParser, + createParser, + extractSection, +} from 'markdown-tree-parser'; // Create parser with custom options const parser = createParser({ - bullet: '-', // Use '-' for lists - emphasis: '_', // Use '_' for emphasis - strong: '__' // Use '__' for strong + bullet: '-', // Use '-' for lists + emphasis: '_', // Use '_' for emphasis + strong: '__', // Use '__' for strong }); // Extract all sections at level 2 @@ -178,8 +189,7 @@ const codeBlocks = parser.selectAll(tree, 'code'); // Custom search const customNode = parser.findNode(tree, (node) => { - return node.type === 'heading' && - parser.getHeadingText(node).includes('API'); + return node.type === 'heading' && parser.getHeadingText(node).includes('API'); }); // Transform content @@ -191,7 +201,9 @@ parser.transform(tree, (node) => { // Get document statistics const stats = parser.getStats(tree); -console.log(`Document has ${stats.wordCount} words and ${stats.headings.total} headings`); +console.log( + `Document has ${stats.wordCount} words and ${stats.headings.total} headings` +); // Generate table of contents const toc = parser.generateTableOfContents(tree, 3); @@ -234,7 +246,7 @@ for (let i = 0; i < sections.length; i++) { #### Constructor ```javascript -new MarkdownTreeParser(options = {}) +new MarkdownTreeParser((options = {})); ``` #### Methods @@ -265,18 +277,18 @@ The library supports powerful CSS-like selectors for searching: ```javascript // Element selectors -parser.selectAll(tree, 'heading') // All headings -parser.selectAll(tree, 'paragraph') // All paragraphs -parser.selectAll(tree, 'link') // All links +parser.selectAll(tree, 'heading'); // All headings +parser.selectAll(tree, 'paragraph'); // All paragraphs +parser.selectAll(tree, 'link'); // All links // Attribute selectors -parser.selectAll(tree, 'heading[depth=1]') // H1 headings -parser.selectAll(tree, 'heading[depth=2]') // H2 headings -parser.selectAll(tree, 'link[url*="github"]') // Links containing "github" +parser.selectAll(tree, 'heading[depth=1]'); // H1 headings +parser.selectAll(tree, 'heading[depth=2]'); // H2 headings +parser.selectAll(tree, 'link[url*="github"]'); // Links containing "github" // Pseudo selectors -parser.selectAll(tree, ':first-child') // First child elements -parser.selectAll(tree, ':last-child') // Last child elements +parser.selectAll(tree, ':first-child'); // First child elements +parser.selectAll(tree, ':last-child'); // Last child elements ``` ## ๐Ÿงช Testing diff --git a/bin/md-tree.js b/bin/md-tree.js index a13bcf9..95378ef 100755 --- a/bin/md-tree.js +++ b/bin/md-tree.js @@ -46,6 +46,7 @@ const MESSAGES = { USAGE_SEARCH: 'โŒ Usage: md-tree search ', USAGE_STATS: 'โŒ Usage: md-tree stats ', USAGE_TOC: 'โŒ Usage: md-tree toc ', + USAGE_CHECK_LINKS: 'โŒ Usage: md-tree check-links ', INDEX_NOT_FOUND: 'index.md not found in', NO_MAIN_TITLE: 'No main title found in index.md', NO_SECTION_FILES: 'No section files found in TOC', @@ -138,6 +139,7 @@ Commands: search Search using CSS-like selectors stats Show document statistics toc Generate table of contents + check-links Verify that links are reachable version Show version information help Show this help message @@ -146,6 +148,7 @@ Options: --level, -l Heading level to work with --format, -f Output format (default: text) --max-level Maximum heading level for TOC (default: 3) + --recursive, -r Recursively check linked markdown files Examples: md-tree list README.md @@ -157,6 +160,7 @@ Examples: md-tree search README.md "heading[depth=2]" md-tree stats README.md md-tree toc README.md --max-level 2 + md-tree check-links README.md --recursive For more information, visit: https://github.com/ksylvan/markdown-tree-parser `); @@ -355,6 +359,34 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser console.log(`๐Ÿ–ผ๏ธ Images: ${stats.images}`); } + async checkLinks(filePath, recursive = false) { + const content = await this.readFile(filePath); + const tree = await this.parser.parse(content); + const results = await this.parser.checkLinks(tree, { + baseDir: path.dirname(path.resolve(filePath)), + recursive, + }); + + console.log(`\n๐Ÿ”— Checking links in ${path.basename(filePath)}:\n`); + + const bad = []; + results.forEach((r) => { + if (r.ok) { + console.log(`โœ… ${r.url}`); + } else { + console.log(`โŒ ${r.url} - ${r.error || r.status}`); + bad.push(r); + } + }); + + if (bad.length > 0) { + console.log(`\n${MESSAGES.WARNING} ${bad.length} broken link(s) found.`); + process.exitCode = 1; + } else { + console.log('\nAll links look good!'); + } + } + async generateTOC(filePath, maxLevel = 3) { const content = await this.readFile(filePath); const tree = await this.parser.parse(content); @@ -384,6 +416,7 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser level: 2, format: 'text', maxLevel: 3, + recursive: false, }; // Parse flags @@ -402,6 +435,8 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser } else if (arg === '--max-level') { options.maxLevel = parseInt(args[i + 1]) || 3; i++; // skip next arg + } else if (arg === '--recursive' || arg === '-r') { + options.recursive = true; } else if (!arg.startsWith('-')) { filteredArgs.push(arg); } @@ -493,6 +528,14 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser await this.generateTOC(args[1], options.maxLevel); } + async handleCheckLinksCommand(args, options) { + if (args.length < 2) { + console.error(MESSAGES.USAGE_CHECK_LINKS); + process.exit(1); + } + await this.checkLinks(args[1], options.recursive); + } + async run() { const { command, args, options } = this.parseArgs(); @@ -531,6 +574,9 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser case 'toc': await this.handleTocCommand(args, options); break; + case 'check-links': + await this.handleCheckLinksCommand(args, options); + break; default: console.error(`${MESSAGES.ERROR} Unknown command: ${command}`); console.log('Run "md-tree help" for usage information.'); diff --git a/index.js b/index.js index c8fba45..4bafd54 100644 --- a/index.js +++ b/index.js @@ -74,4 +74,16 @@ export async function generateTOC(markdown, maxLevel = 3, options = {}) { const parser = new MarkdownTreeParser(options); const tree = await parser.parse(markdown); return parser.generateTableOfContents(tree, maxLevel); -} \ No newline at end of file +} + +/** + * Quick utility to check links in markdown + * @param {string} markdown - Markdown content + * @param {Object} options - Parser options + * @returns {Promise} Array of link check results + */ +export async function checkLinks(markdown, options = {}) { + const parser = new MarkdownTreeParser(options); + const tree = await parser.parse(markdown); + return parser.checkLinks(tree, options); +} diff --git a/lib/markdown-parser.js b/lib/markdown-parser.js index 57258cc..6e8299b 100644 --- a/lib/markdown-parser.js +++ b/lib/markdown-parser.js @@ -399,4 +399,63 @@ export class MarkdownTreeParser { return toc; } + + /** + * Check that all links in a document are reachable + * @param {Object} tree - Parsed markdown AST + * @param {Object} options - Options object + * @param {string} [options.baseDir='.'] - Base directory for resolving local links + * @param {boolean} [options.recursive=false] - Recursively check linked markdown files + * @param {Set} [options._visited] - Internal set of visited files to prevent loops + * @returns {Promise} Array of result objects { url, ok, status?, error? } + */ + async checkLinks( + tree, + { baseDir = '.', recursive = false, _visited = new Set() } = {} + ) { + const links = []; + visit(tree, 'link', (node) => { + if (node.url) links.push(node.url); + }); + + const results = []; + + for (const url of links) { + if (url.startsWith('http://') || url.startsWith('https://')) { + try { + const res = await fetch(url, { method: 'HEAD' }); + results.push({ url, ok: res.ok, status: res.status }); + } catch (err) { + results.push({ url, ok: false, error: err.message }); + } + } else if (url.startsWith('#') || url.startsWith('mailto:')) { + // Assume local anchors and mailto links are valid + results.push({ url, ok: true }); + } else { + const target = path.resolve(baseDir, url.split('#')[0]); + try { + await fs.access(target); + results.push({ url, ok: true }); + + if (recursive && /\.md$/i.test(target)) { + if (!_visited.has(target)) { + _visited.add(target); + const content = await fs.readFile(target, 'utf-8'); + const subTree = await this.parse(content); + const subResults = await this.checkLinks(subTree, { + baseDir: path.dirname(target), + recursive, + _visited, + }); + results.push(...subResults); + } + } + } catch (err) { + results.push({ url, ok: false, error: 'File not found' }); + } + } + } + + return results; + } }