From a5188737d3f3d0b0ed9ed45e24caa87f4df85563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20J=C3=BCrisoo?= Date: Fri, 24 Jan 2025 15:55:14 +0100 Subject: [PATCH] Add CSV reader --- packages/nodejs/package.json | 1 + .../src/computers/CsvFileRead/CsvFileRead.ts | 89 +++++++++++++++++++ .../nodejs/src/computers/CsvFileRead/index.ts | 1 + packages/nodejs/src/computers/index.ts | 1 + yarn.lock | 8 ++ 5 files changed, 100 insertions(+) create mode 100644 packages/nodejs/src/computers/CsvFileRead/CsvFileRead.ts create mode 100644 packages/nodejs/src/computers/CsvFileRead/index.ts diff --git a/packages/nodejs/package.json b/packages/nodejs/package.json index 4bb833d63..86a6798fc 100644 --- a/packages/nodejs/package.json +++ b/packages/nodejs/package.json @@ -31,6 +31,7 @@ "dependencies": { "@data-story/core": "workspace:*", "axios": "^1.3.4", + "csv-parse": "^5.6.0", "dotenv": "^16.0.3", "glob": "^11.0.0", "openai": "^3.2.1", diff --git a/packages/nodejs/src/computers/CsvFileRead/CsvFileRead.ts b/packages/nodejs/src/computers/CsvFileRead/CsvFileRead.ts new file mode 100644 index 000000000..2eb9ff8fc --- /dev/null +++ b/packages/nodejs/src/computers/CsvFileRead/CsvFileRead.ts @@ -0,0 +1,89 @@ +import * as glob from 'glob'; +import fs from 'fs'; +import path from 'path'; +import { parse } from 'csv-parse/sync'; +import { Computer, serializeError, str } from '@data-story/core'; + +export const CsvFileRead: Computer = { + name: 'CsvFile.read', + label: 'CsvFile.read', + category: 'NodeJs', + inputs: [], + outputs: [ + { + name: 'output', + schema: {}, + }, + { + name: 'errors', + schema: {}, + }, + ], + params: [ + str({ + name: 'file_path', + label: 'File path (supports glob patterns)', + help: 'File path, e.g., **/*.csv', + }), + str({ + name: 'delimiter', + label: 'Delimiter', + help: 'CSV delimiter character (default: ,)', + value: ',', + }), + ], + + async *run({ output, params }) { + const pathPattern = params.file_path as string; + const delimiter = params.delimiter as string; + + // Check if the provided path is absolute + const isAbsolutePath = path.isAbsolute(pathPattern); + + let files: string[] = []; + try { + if (isAbsolutePath) { + // If it's an absolute path, use it directly with glob + files = glob.sync(pathPattern, { + ignore: ['**/node_modules/**'], + absolute: true, + }); + } else { + // If it's a relative path, resolve using the workspace folder + const cwd = process.env.WORKSPACE_FOLDER_PATH as string; + files = glob.sync(pathPattern, { + cwd, // Resolve relative paths from the workspace folder + ignore: ['**/node_modules/**'], + absolute: true, + }); + } + + // Process each file found by glob + for (const file of files) { + try { + const content = fs.readFileSync(file, 'utf-8'); + const records = parse(content, { + columns: true, // Use first row as column names + delimiter, + skip_empty_lines: true, + trim: true, + }); + + // Add file path to each record + const items = records.map((record: any) => ({ + ...record, + _filePath: file, + })); + + output.push(items); + yield; + } catch (fileError: any) { + output.pushTo('errors', [serializeError(fileError)]); + } + } + } catch (error: any) { + output.pushTo('errors', [serializeError(error)]); + yield; + } + }, +}; diff --git a/packages/nodejs/src/computers/CsvFileRead/index.ts b/packages/nodejs/src/computers/CsvFileRead/index.ts new file mode 100644 index 000000000..f4cc3faea --- /dev/null +++ b/packages/nodejs/src/computers/CsvFileRead/index.ts @@ -0,0 +1 @@ +export { CsvFileRead } from './CsvFileRead' \ No newline at end of file diff --git a/packages/nodejs/src/computers/index.ts b/packages/nodejs/src/computers/index.ts index 12dfbd1b8..0c78dc093 100644 --- a/packages/nodejs/src/computers/index.ts +++ b/packages/nodejs/src/computers/index.ts @@ -1,3 +1,4 @@ +export { CsvFileRead } from './CsvFileRead' export { JsonFileRead } from './JsonFileRead' export { JsonFileWrite } from './JsonFileWrite' export { ListFiles } from './ListFiles' diff --git a/yarn.lock b/yarn.lock index bb79beadb..ee24a7a0b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -550,6 +550,7 @@ __metadata: "@types/node": "npm:18.14.2" "@types/ws": "npm:^8.5.4" axios: "npm:^1.3.4" + csv-parse: "npm:^5.6.0" dotenv: "npm:^16.0.3" glob: "npm:^11.0.0" openai: "npm:^3.2.1" @@ -5032,6 +5033,13 @@ __metadata: languageName: node linkType: hard +"csv-parse@npm:^5.6.0": + version: 5.6.0 + resolution: "csv-parse@npm:5.6.0" + checksum: 10/4c82e11f50ae0ccbac2aed716ef2502d0468bf96552083561db789fc0258ee4bb0a30106fcfb2684f153cb4042f0413e0eac3645d5466874803b7ccdeba67ac8 + languageName: node + linkType: hard + "culvert@npm:^0.1.2": version: 0.1.2 resolution: "culvert@npm:0.1.2"