diff --git a/tools/data_audit.ts b/tools/data_audit.ts new file mode 100644 index 0000000..250945c --- /dev/null +++ b/tools/data_audit.ts @@ -0,0 +1,88 @@ + +import * as fs from "fs"; +import * as path from "path"; +import { parse as csvStringify } from "csv-parse/sync"; + +export type OutputFormat = "json" | "csv" | "both"; + +export interface DataAuditOptions { + format: OutputFormat; + count: number; + seed?: number; + outputDir: string; +} + +/** Simple seedable LCG RNG for deterministic output */ +export class SeededRng { + private state: number; + constructor(seed: number = Date.now()) { + this.state = seed >>> 0; + } + next(): number { + this.state = (Math.imul(1664525, this.state) + 1013904223) >>> 0; + return this.state / 0x100000000; + } + nextInt(min: number, max: number): number { + return Math.floor(this.next() * (max - min + 1)) + min; + } +} + +export function validateOptions(opts: Partial): DataAuditOptions { + const count = Number(opts.count ?? 10); + if (!Number.isInteger(count) || count < 0) { + throw new Error(); + } + const format = (opts.format ?? "json") as OutputFormat; + if (!["json", "csv", "both"].includes(format)) { + throw new Error(); + } + return { format, count, seed: opts.seed, outputDir: opts.outputDir ?? "." }; +} + +export async function runAudit(opts: DataAuditOptions): Promise { + const rng = new SeededRng(opts.seed ?? Date.now()); + const records = Array.from({ length: opts.count }, (_, i) => ({ + id: i + 1, + value: rng.nextInt(1, 10000), + label: , + })); + + const writeJson = opts.format === "json" || opts.format === "both"; + const writeCsv = opts.format === "csv" || opts.format === "both"; + + if (writeJson) { + const outPath = path.join(opts.outputDir, "data_audit.json"); + fs.writeFileSync(outPath, JSON.stringify(records, null, 2)); + console.log(); + } + + if (writeCsv) { + const header = Object.keys(records[0] ?? {}).join(","); + const rows = records.map(r => Object.values(r).join(",")).join(" +"); + const outPath = path.join(opts.outputDir, "data_audit.csv"); + fs.writeFileSync(outPath, ); + console.log(); + } +} + +async function main() { + const args = process.argv.slice(2); + const get = (flag: string) => { + const idx = args.indexOf(flag); + return idx !== -1 ? args[idx + 1] : undefined; + }; + const format = (get("--format") ?? "json") as OutputFormat; + const rawCount = get("--count") ?? "10"; + const seed = get("--seed") !== undefined ? Number(get("--seed")) : undefined; + const outputDir = get("--output") ?? "."; + try { + const opts = validateOptions({ format, count: Number(rawCount), seed, outputDir }); + await runAudit(opts); + } catch (e: any) { + console.error(e.message); + process.exit(1); + } +} + +main();