diff --git a/recipes/ob1-file-converter/README.md b/recipes/ob1-file-converter/README.md new file mode 100644 index 00000000..3a0e927e --- /dev/null +++ b/recipes/ob1-file-converter/README.md @@ -0,0 +1,142 @@ +# OB1 File Converter + +> Convert local files into extracted-record JSON before any Open Brain import. + +## What It Does + +OB1 File Converter reads local `.txt`, `.md`, and JSON array files and emits a JSON array of extracted records. It is an offline preparation step for large or mixed file sources. + +The converter performs mechanical splitting only. Chunked output is marked as `type: "raw_chunk"` and `confidence: "unprocessed"` so it stays evidence-grade until a person, trusted importer, or later extraction workflow reviews it. + +## Prerequisites + +- Working Open Brain setup ([guide](../../docs/01-getting-started.md)) +- Node.js 18+ +- Local text, Markdown, or JSON array files + +## Credential Tracker + +No credentials are required. This recipe does not connect to Supabase or any external API. + +```text +OB1 FILE CONVERTER -- CREDENTIAL TRACKER +-------------------------------------- + +LOCAL FILES + Input file or folder: ____________ + Output records JSON: ____________ + +SUPABASE + Not used by this recipe. + +-------------------------------------- +``` + +## Steps + +1. Open this recipe folder: + + ```bash + cd recipes/ob1-file-converter + ``` + +2. Run the built-in tests: + + ```bash + npm test + ``` + +3. Convert the sample directory: + + ```bash + node convert.js fixtures/dir --source sample --source-label "Sample files" > sample-records.json + ``` + +4. Convert one Markdown or text file: + + ```bash + node convert.js /path/to/note.md --source obsidian --source-label "House vault" > records.json + ``` + +5. Convert a folder of local files: + + ```bash + node convert.js /path/to/folder --source local-files --source-label "Local files" > records.json + ``` + +6. Convert a JSON array export: + + ```bash + node convert.js /path/to/export.json --source json-export > records.json + ``` + +7. Inspect the output before using any later import path: + + ```bash + head -40 records.json + ``` + +## Options + +| Option | Default | Description | +| ------ | ------- | ----------- | +| `--output FILE` | stdout | Write JSON output to a file. | +| `--source SOURCE` | `file` | Source slug for emitted records. | +| `--source-label LABEL` | `${source} files` | Human-readable source label. | +| `--format FORMAT` | `auto` | `auto`, `text`, `markdown`, or `json`. | +| `--max-chunk-size N` | `4000` | Maximum characters before paragraph-first chunking. | +| `--max-file-size N` | `1048576` | Maximum file bytes to read. Larger files are skipped with a warning. | +| `--max-depth N` | `5` | Maximum directory recursion depth. | + +## Expected Outcome + +The command prints JSON records that can be inspected and passed into a later normalization path. Each record includes: + +- `content` +- `source` +- `source_type` +- `source_label` +- `source_path` +- `source_locator` +- `created_at` when available +- `type` +- `topics` +- `confidence` + +For files larger than `--max-chunk-size`, each chunk receives a locator such as `chunk-1-of-3`. + +## Safety Notes + +- Symlinks are skipped. +- Binary files are skipped. +- Files over `--max-file-size` are skipped. +- Empty text files produce an empty array. +- Raw file chunks are not treated as instruction-grade memory. + +## What This Does Not Do + +- It does not write to Supabase. +- It does not create ingestion job rows. +- It does not call an LLM. +- It does not generate embeddings. +- It does not deduplicate against existing thoughts. + +Those pieces belong to later ingestion PRs. + +## Troubleshooting + +**Issue: Output is an empty array** +Solution: Confirm the input path contains supported `.txt`, `.md`, `.markdown`, or `.json` files and that the files are not empty. + +**Issue: `Skipped binary file`** +Solution: Export the source as text or Markdown first. This converter intentionally avoids binary parsing. + +**Issue: `Skipped file over max size`** +Solution: Increase `--max-file-size` or split the source file before conversion. + +**Issue: JSON input is skipped** +Solution: JSON input must be an array. Object-shaped export formats should be converted by a source-specific importer in a later PR. + +## Related + +This recipe fits the Open Brain workflow from Nate B. Jones. Nate shares practical systems at [Nate's Newsletter](https://substack.com/@natesnewsletter) and [natebjones.com](https://natebjones.com). diff --git a/recipes/ob1-file-converter/convert.js b/recipes/ob1-file-converter/convert.js new file mode 100644 index 00000000..5c1d7333 --- /dev/null +++ b/recipes/ob1-file-converter/convert.js @@ -0,0 +1,411 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const TEXT_EXTENSIONS = new Set([".txt", ".md", ".markdown"]); + +function usage() { + console.log(`Usage: + node convert.js [options] + +Options: + --output FILE Write JSON output to a file instead of stdout + --source SOURCE Source slug for emitted records (default: file) + --source-label LABEL Human-readable source label + --format FORMAT auto, text, markdown, or json (default: auto) + --max-chunk-size N Maximum chunk characters before splitting (default: 4000) + --max-file-size N Maximum file bytes to read (default: 1048576) + --max-depth N Maximum directory recursion depth (default: 5) + --help Show this help + +Exit codes: + 0 success + 1 conversion error + 2 usage, parse, or file error`); +} + +function parseArgs(argv) { + const options = { + output: "", + source: "file", + sourceLabel: "", + format: "auto", + maxChunkSize: 4000, + maxFileSize: 1024 * 1024, + maxDepth: 5, + }; + + let input = ""; + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === "--help" || arg === "help") { + options.help = true; + } else if (arg === "--output") { + options.output = argv[++i] || ""; + } else if (arg.startsWith("--output=")) { + options.output = arg.slice("--output=".length); + } else if (arg === "--source") { + options.source = argv[++i] || ""; + } else if (arg.startsWith("--source=")) { + options.source = arg.slice("--source=".length); + } else if (arg === "--source-label") { + options.sourceLabel = argv[++i] || ""; + } else if (arg.startsWith("--source-label=")) { + options.sourceLabel = arg.slice("--source-label=".length); + } else if (arg === "--format") { + options.format = argv[++i] || ""; + } else if (arg.startsWith("--format=")) { + options.format = arg.slice("--format=".length); + } else if (arg === "--max-chunk-size") { + options.maxChunkSize = parseNonNegativeInt(argv[++i], "max-chunk-size"); + } else if (arg.startsWith("--max-chunk-size=")) { + options.maxChunkSize = parseNonNegativeInt(arg.slice("--max-chunk-size=".length), "max-chunk-size"); + } else if (arg === "--max-file-size") { + options.maxFileSize = parseNonNegativeInt(argv[++i], "max-file-size"); + } else if (arg.startsWith("--max-file-size=")) { + options.maxFileSize = parseNonNegativeInt(arg.slice("--max-file-size=".length), "max-file-size"); + } else if (arg === "--max-depth") { + options.maxDepth = parseNonNegativeInt(argv[++i], "max-depth"); + } else if (arg.startsWith("--max-depth=")) { + options.maxDepth = parseNonNegativeInt(arg.slice("--max-depth=".length), "max-depth"); + } else if (arg.startsWith("--")) { + throw new Error(`Unknown option: ${arg}`); + } else if (!input) { + input = arg; + } else { + throw new Error(`Unexpected argument: ${arg}`); + } + } + + if (!["auto", "text", "markdown", "json"].includes(options.format)) { + throw new Error("--format must be auto, text, markdown, or json"); + } + if (!options.source) throw new Error("--source cannot be empty"); + if (!options.help && !input) throw new Error("Missing input path"); + return { input, options }; +} + +function parseNonNegativeInt(value, name) { + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed < 0) { + throw new Error(`--${name} must be a non-negative integer`); + } + return parsed; +} + +function convertPath(inputPath, options) { + if (!inputPath) throw new Error("Missing input path"); + const absolute = path.resolve(inputPath); + const stat = fs.lstatSync(absolute); + const warnings = []; + let records; + + if (stat.isSymbolicLink()) { + warnings.push(`Skipped symlink: ${absolute}`); + return { records: [], warnings }; + } + + if (stat.isDirectory()) { + records = convertDirectory(absolute, options, warnings); + } else if (stat.isFile()) { + records = convertFile(absolute, path.dirname(absolute), options, warnings); + } else { + throw new Error(`Input is not a regular file or directory: ${absolute}`); + } + + return { records, warnings }; +} + +function convertDirectory(root, options, warnings) { + const files = []; + walkDirectory(root, root, 0, options, warnings, files); + return files.flatMap((file) => convertFile(file, root, options, warnings)); +} + +function walkDirectory(root, current, depth, options, warnings, files) { + if (depth > options.maxDepth) { + warnings.push(`Skipped directory beyond max depth: ${path.relative(root, current) || "."}`); + return; + } + + for (const entry of fs.readdirSync(current, { withFileTypes: true })) { + const fullPath = path.join(current, entry.name); + const stat = fs.lstatSync(fullPath); + if (stat.isSymbolicLink()) { + warnings.push(`Skipped symlink: ${path.relative(root, fullPath)}`); + continue; + } + if (stat.isDirectory()) { + walkDirectory(root, fullPath, depth + 1, options, warnings, files); + continue; + } + if (!stat.isFile()) continue; + if (isSupportedPath(fullPath, options.format)) files.push(fullPath); + } +} + +function isSupportedPath(filePath, format) { + if (format === "text") return true; + if (format === "markdown") return [".md", ".markdown"].includes(path.extname(filePath).toLowerCase()); + if (format === "json") return path.extname(filePath).toLowerCase() === ".json"; + const ext = path.extname(filePath).toLowerCase(); + return TEXT_EXTENSIONS.has(ext) || ext === ".json"; +} + +function convertFile(filePath, root, options, warnings) { + const stat = fs.lstatSync(filePath); + const sourcePath = normalizeRelativePath(root, filePath); + if (stat.size > options.maxFileSize) { + warnings.push(`Skipped file over max size: ${sourcePath}`); + return []; + } + if (looksBinary(filePath)) { + warnings.push(`Skipped binary file: ${sourcePath}`); + return []; + } + + const format = detectFormat(filePath, options.format); + if (format === "json") return convertJsonFile(filePath, root, options, warnings); + return convertTextFile(filePath, root, options, format); +} + +function detectFormat(filePath, requested) { + if (requested !== "auto") return requested; + const ext = path.extname(filePath).toLowerCase(); + if (ext === ".json") return "json"; + if (ext === ".md" || ext === ".markdown") return "markdown"; + return "text"; +} + +function looksBinary(filePath) { + const fd = fs.openSync(filePath, "r"); + try { + const buffer = Buffer.alloc(512); + const bytes = fs.readSync(fd, buffer, 0, buffer.length, 0); + return buffer.subarray(0, bytes).includes(0); + } finally { + fs.closeSync(fd); + } +} + +function convertJsonFile(filePath, root, options, warnings) { + const raw = fs.readFileSync(filePath, "utf8"); + if (!raw.trim()) return []; + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) { + warnings.push(`Skipped JSON file that is not an array: ${normalizeRelativePath(root, filePath)}`); + return []; + } + + return parsed.flatMap((record, index) => { + const content = extractContent(record); + if (!content) { + warnings.push(`Skipped JSON record without content/text: ${normalizeRelativePath(root, filePath)}#${index + 1}`); + return []; + } + const base = { + content, + source: firstString(record?.source_type, record?.source, options.source), + source_type: firstString(record?.source_type, record?.source, options.source), + source_label: firstString(record?.source_label, record?.sourceLabel, options.sourceLabel) || defaultSourceLabel(options), + source_path: normalizeRelativePath(root, filePath), + source_locator: firstString(record?.source_locator, record?.sourceLocator, record?.id) || `record-${index + 1}`, + created_at: firstString(record?.created_at, record?.createdAt, record?.timestamp), + type: firstString(record?.type) || "raw_chunk", + topics: Array.isArray(record?.topics) ? record.topics : [], + confidence: firstString(record?.confidence) || "unprocessed", + }; + return chunkRecord(base, options); + }); +} + +function convertTextFile(filePath, root, options, format) { + const raw = fs.readFileSync(filePath, "utf8"); + if (!raw.trim()) return []; + const sourcePath = normalizeRelativePath(root, filePath); + const parsed = format === "markdown" ? parseFrontmatter(raw) : { body: raw, metadata: {} }; + const content = parsed.body.trim(); + if (!content) return []; + + const base = { + content, + source: options.source, + source_type: options.source, + source_label: options.sourceLabel || defaultSourceLabel(options), + source_path: sourcePath, + source_locator: "file", + created_at: firstString(parsed.metadata.created_at, parsed.metadata.createdAt, parsed.metadata.date), + type: firstString(parsed.metadata.type) || "raw_chunk", + topics: normalizeTopics(parsed.metadata.topics), + confidence: "unprocessed", + }; + return chunkRecord(base, options); +} + +function parseFrontmatter(raw) { + if (!raw.startsWith("---")) return { body: raw, metadata: {} }; + const lines = raw.split(/\r?\n/); + const end = lines.findIndex((line, index) => index > 0 && line.trim() === "---"); + if (end === -1) return { body: raw, metadata: {} }; + + const metadata = {}; + for (const line of lines.slice(1, end)) { + const idx = line.indexOf(":"); + if (idx === -1) continue; + const key = line.slice(0, idx).trim(); + const value = line.slice(idx + 1).trim(); + if (key) metadata[key] = value; + } + + return { body: lines.slice(end + 1).join(os.EOL), metadata }; +} + +function normalizeTopics(value) { + if (Array.isArray(value)) return value; + if (typeof value !== "string" || !value.trim()) return []; + return value + .split(",") + .map((topic) => topic.trim()) + .filter(Boolean); +} + +function extractContent(record) { + if (typeof record?.content === "string") return record.content.trim(); + if (typeof record?.text === "string") return record.text.trim(); + return ""; +} + +function firstString(...values) { + for (const value of values) { + if (typeof value === "string" && value.trim()) return value.trim(); + } + return ""; +} + +function defaultSourceLabel(options) { + return `${options.source} files`; +} + +function chunkRecord(record, options) { + const chunks = chunkText(record.content, options.maxChunkSize); + if (chunks.length <= 1) { + return [{ ...record, source_locator: record.source_locator || "file" }]; + } + return chunks.map((content, index) => ({ + ...record, + content, + source_locator: `chunk-${index + 1}-of-${chunks.length}`, + type: record.type || "raw_chunk", + confidence: record.confidence || "unprocessed", + })); +} + +function chunkText(text, maxSize) { + const cleaned = String(text).replace(/\r\n/g, "\n").trim(); + if (!cleaned || maxSize === 0 || cleaned.length <= maxSize) return cleaned ? [cleaned] : []; + + const chunks = []; + const paragraphs = cleaned.split(/\n{2,}/).map((part) => part.trim()).filter(Boolean); + let current = ""; + + for (const paragraph of paragraphs) { + if (paragraph.length > maxSize) { + if (current) { + chunks.push(current); + current = ""; + } + chunks.push(...splitLongParagraph(paragraph, maxSize)); + continue; + } + const candidate = current ? `${current}\n\n${paragraph}` : paragraph; + if (candidate.length <= maxSize) { + current = candidate; + } else { + if (current) chunks.push(current); + current = paragraph; + } + } + if (current) chunks.push(current); + return chunks; +} + +function splitLongParagraph(paragraph, maxSize) { + const sentences = paragraph.match(/[^.!?]+[.!?]+|\S.+$/g) || [paragraph]; + const chunks = []; + let current = ""; + + for (const sentence of sentences.map((s) => s.trim()).filter(Boolean)) { + if (sentence.length > maxSize) { + if (current) { + chunks.push(current); + current = ""; + } + chunks.push(...hardSplit(sentence, maxSize)); + continue; + } + const candidate = current ? `${current} ${sentence}` : sentence; + if (candidate.length <= maxSize) { + current = candidate; + } else { + if (current) chunks.push(current); + current = sentence; + } + } + if (current) chunks.push(current); + return chunks; +} + +function hardSplit(text, maxSize) { + const chunks = []; + let remaining = text.trim(); + while (remaining.length > maxSize) { + let splitAt = remaining.lastIndexOf(" ", maxSize); + if (splitAt < Math.floor(maxSize / 2)) splitAt = maxSize; + chunks.push(remaining.slice(0, splitAt).trim()); + remaining = remaining.slice(splitAt).trim(); + } + if (remaining) chunks.push(remaining); + return chunks; +} + +function normalizeRelativePath(root, filePath) { + return path.relative(root, filePath).replace(/\\/g, "/") || path.basename(filePath); +} + +function writeOutput(records, options) { + const json = `${JSON.stringify(records, null, 2)}\n`; + if (options.output) { + fs.writeFileSync(path.resolve(options.output), json, "utf8"); + } else { + process.stdout.write(json); + } +} + +async function main() { + let cli; + try { + cli = parseArgs(process.argv.slice(2)); + } catch (error) { + console.error(`Error: ${error.message}`); + usage(); + process.exit(2); + } + + if (cli.options.help) { + usage(); + process.exit(0); + } + + try { + const result = convertPath(cli.input, cli.options); + for (const warning of result.warnings) console.error(`Warning: ${warning}`); + writeOutput(result.records, cli.options); + } catch (error) { + console.error(`Error: ${error.message}`); + process.exit(2); + } +} + +main(); diff --git a/recipes/ob1-file-converter/fixtures/dir/a.txt b/recipes/ob1-file-converter/fixtures/dir/a.txt new file mode 100644 index 00000000..eb16a318 --- /dev/null +++ b/recipes/ob1-file-converter/fixtures/dir/a.txt @@ -0,0 +1 @@ +Directory traversal should include plain text files. diff --git a/recipes/ob1-file-converter/fixtures/dir/note.md b/recipes/ob1-file-converter/fixtures/dir/note.md new file mode 100644 index 00000000..494bd02c --- /dev/null +++ b/recipes/ob1-file-converter/fixtures/dir/note.md @@ -0,0 +1,5 @@ +--- +topics: directory, markdown +--- + +Directory traversal should include Markdown files and parse simple frontmatter. diff --git a/recipes/ob1-file-converter/fixtures/large.txt b/recipes/ob1-file-converter/fixtures/large.txt new file mode 100644 index 00000000..51fdcdda --- /dev/null +++ b/recipes/ob1-file-converter/fixtures/large.txt @@ -0,0 +1,5 @@ +Paragraph one captures the first operational note for a heavy import file. It should stay together when chunking is configured with enough room for one paragraph. + +Paragraph two captures the second operational note for a heavy import file. It should become a separate chunk when the maximum chunk size is intentionally low during tests. + +Paragraph three captures the final operational note for a heavy import file. The converter should preserve source path and locator metadata for every chunk. diff --git a/recipes/ob1-file-converter/fixtures/records.json b/recipes/ob1-file-converter/fixtures/records.json new file mode 100644 index 00000000..9d01286d --- /dev/null +++ b/recipes/ob1-file-converter/fixtures/records.json @@ -0,0 +1,15 @@ +[ + { + "id": "json-record-1", + "text": "JSON input can be reshaped into extracted records for later normalization.", + "source": "json-export", + "source_label": "JSON export", + "created_at": "2026-06-01T00:00:00Z", + "topics": ["json", "import"] + }, + { + "id": "json-record-2", + "content": "Oversized JSON content can be chunked before import review.", + "source_type": "json-export" + } +] diff --git a/recipes/ob1-file-converter/fixtures/single.txt b/recipes/ob1-file-converter/fixtures/single.txt new file mode 100644 index 00000000..cc53df88 --- /dev/null +++ b/recipes/ob1-file-converter/fixtures/single.txt @@ -0,0 +1,3 @@ +Use dry-run import checks before writing migrated source data into Open Brain. + +Keep converted records small enough for review before any database write. diff --git a/recipes/ob1-file-converter/fixtures/with-frontmatter.md b/recipes/ob1-file-converter/fixtures/with-frontmatter.md new file mode 100644 index 00000000..a4249246 --- /dev/null +++ b/recipes/ob1-file-converter/fixtures/with-frontmatter.md @@ -0,0 +1,7 @@ +--- +created_at: 2026-05-01T12:00:00Z +type: decision +topics: import, metadata, review +--- + +Use an offline converter before import so heavy files become inspectable extracted records. diff --git a/recipes/ob1-file-converter/metadata.json b/recipes/ob1-file-converter/metadata.json new file mode 100644 index 00000000..7c6af64e --- /dev/null +++ b/recipes/ob1-file-converter/metadata.json @@ -0,0 +1,21 @@ +{ + "name": "OB1 File Converter", + "description": "Offline recipe for converting local text, Markdown, and JSON exports into extracted-record JSON artifacts before Open Brain import.", + "category": "recipes", + "author": { + "name": "Alan Shurafa", + "github": "alanshurafa" + }, + "version": "1.0.0", + "requires": { + "open_brain": true, + "services": [], + "tools": ["Node.js 18+"] + }, + "requires_skills": [], + "tags": ["import", "converter", "cli", "chunking", "dry-run"], + "difficulty": "beginner", + "estimated_time": "10 minutes", + "created": "2026-06-06", + "updated": "2026-06-06" +} diff --git a/recipes/ob1-file-converter/package.json b/recipes/ob1-file-converter/package.json new file mode 100644 index 00000000..baaef681 --- /dev/null +++ b/recipes/ob1-file-converter/package.json @@ -0,0 +1,19 @@ +{ + "name": "ob1-file-converter", + "version": "1.0.0", + "description": "Offline converter for turning local text, markdown, and JSON exports into Open Brain extracted-record JSON.", + "private": true, + "type": "module", + "main": "convert.js", + "bin": { + "ob1-file-converter": "./convert.js" + }, + "engines": { + "node": ">=18" + }, + "scripts": { + "test": "node --test test.js", + "convert:sample": "node convert.js fixtures/dir --source sample --source-label \"Sample files\"" + }, + "license": "FSL-1.1-MIT" +} diff --git a/recipes/ob1-file-converter/test.js b/recipes/ob1-file-converter/test.js new file mode 100644 index 00000000..5f8d877f --- /dev/null +++ b/recipes/ob1-file-converter/test.js @@ -0,0 +1,115 @@ +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const cli = path.join(__dirname, "convert.js"); + +function run(args) { + return spawnSync(process.execPath, [cli, ...args], { + cwd: __dirname, + encoding: "utf8", + }); +} + +function recordsFrom(result) { + assert.equal(result.status, 0, result.stderr); + return JSON.parse(result.stdout); +} + +test("single text file produces a record array", () => { + const result = run(["fixtures/single.txt", "--source", "sample", "--source-label", "Sample files"]); + const records = recordsFrom(result); + assert.equal(records.length, 1); + assert.match(records[0].content, /dry-run import checks/); + assert.equal(records[0].source, "sample"); + assert.equal(records[0].source_path, "single.txt"); +}); + +test("markdown frontmatter populates metadata fields", () => { + const result = run(["fixtures/with-frontmatter.md", "--source", "markdown"]); + const records = recordsFrom(result); + assert.equal(records.length, 1); + assert.equal(records[0].created_at, "2026-05-01T12:00:00Z"); + assert.equal(records[0].type, "decision"); + assert.deepEqual(records[0].topics, ["import", "metadata", "review"]); +}); + +test("directory traversal includes supported files with relative source paths", () => { + const result = run(["fixtures/dir", "--source", "dir"]); + const records = recordsFrom(result); + assert.equal(records.length, 2); + assert.deepEqual(records.map((r) => r.source_path).sort(), ["a.txt", "note.md"]); +}); + +test("chunking preserves locators and keeps chunks below threshold", () => { + const result = run(["fixtures/large.txt", "--source", "large", "--max-chunk-size", "180"]); + const records = recordsFrom(result); + assert.ok(records.length > 1); + assert.ok(records.every((record) => record.content.length <= 180)); + assert.ok(records.every((record) => /^chunk-\d+-of-\d+$/.test(record.source_locator))); + assert.ok(records.every((record) => record.type === "raw_chunk")); +}); + +test("json arrays are reshaped into extracted records", () => { + const result = run(["fixtures/records.json", "--source", "json", "--source-label", "JSON fixture"]); + const records = recordsFrom(result); + assert.equal(records.length, 2); + assert.equal(records[0].source_path, "records.json"); + assert.equal(records[0].source_locator, "json-record-1"); + assert.deepEqual(records[0].topics, ["json", "import"]); +}); + +test("output file option writes JSON to disk", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ob1-file-converter-")); + const out = path.join(tmpDir, "records.json"); + const result = run(["fixtures/single.txt", "--output", out]); + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, ""); + const records = JSON.parse(fs.readFileSync(out, "utf8")); + assert.equal(records.length, 1); +}); + +test("binary files are skipped with a warning", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ob1-file-converter-")); + const binary = path.join(tmpDir, "data.txt"); + fs.writeFileSync(binary, Buffer.from([0, 1, 2, 3])); + const result = run([binary]); + assert.equal(result.status, 0); + assert.deepEqual(JSON.parse(result.stdout), []); + assert.match(result.stderr, /Skipped binary file/); +}); + +test("symlinks are skipped when the platform allows creating one", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ob1-file-converter-")); + const target = path.join(tmpDir, "target.txt"); + const link = path.join(tmpDir, "link.txt"); + fs.writeFileSync(target, "Target file"); + try { + fs.symlinkSync(target, link); + } catch { + return; + } + const result = run([tmpDir]); + assert.equal(result.status, 0); + assert.match(result.stderr, /Skipped symlink/); +}); + +test("empty text file produces an empty array", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ob1-file-converter-")); + const empty = path.join(tmpDir, "empty.txt"); + fs.writeFileSync(empty, ""); + const result = run([empty]); + assert.equal(result.status, 0); + assert.deepEqual(JSON.parse(result.stdout), []); +}); + +test("usage errors exit 2", () => { + const result = run([]); + assert.equal(result.status, 2); + assert.match(result.stdout, /Usage:/); +});