Skip to content

Commit

Permalink
[spelunker] Add @files; add @purgeFile (call from @import); colorize …
Browse files Browse the repository at this point in the history
…output (#432)

- Colorize output from most commands and operations
- New command: `@files` lists all files found in the chunks; `--filter`
limits output to files containing a substring.
- New command: `@purgeFile` removes all mention of a file from the
indexes.
- Update `@import` to call the guts of `@purgeFile` before re-importing
a file, to avoid duplicates.

---------

Co-authored-by: Guido van Rossum <[email protected]>
  • Loading branch information
gvanrossum-ms and gvanrossum authored Nov 25, 2024
1 parent 1470336 commit 685eacc
Show file tree
Hide file tree
Showing 5 changed files with 284 additions and 73 deletions.
1 change: 1 addition & 0 deletions ts/examples/spelunker/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
},
"dependencies": {
"aiclient": "workspace:*",
"chalk": "^5.3.0",
"code-processor": "workspace:*",
"dotenv": "^16.3.1",
"interactive-app": "workspace:*",
Expand Down
27 changes: 12 additions & 15 deletions ts/examples/spelunker/src/chunkyIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ export type IndexType =
| "topics"
| "goals"
| "dependencies";
export type NamedIndex = {
name: IndexType;
index: knowLib.TextIndex<string, ChunkId>;
};
export type NamedIndex = [IndexType, knowLib.TextIndex<string, ChunkId>];

// A bundle of object stores and indexes etc.
export class ChunkyIndex {
Expand All @@ -31,7 +28,7 @@ export class ChunkyIndex {
queryMaker: TypeChatJsonTranslator<QuerySpecs>;
answerMaker: TypeChatJsonTranslator<AnswerSpecs>;

// The rest are asynchronously initialized by initialize().
// The rest are asynchronously initialized by reInitialize(rootDir).
rootDir!: string;
chunkFolder!: ObjectFolder<Chunk>;
summariesIndex!: knowLib.TextIndex<string, ChunkId>;
Expand Down Expand Up @@ -85,22 +82,22 @@ export class ChunkyIndex {
}
}

getIndexByName(name: IndexType): knowLib.TextIndex<string, ChunkId> {
for (const pair of this.allIndexes()) {
if (pair.name === name) {
return pair.index;
getIndexByName(indexName: IndexType): knowLib.TextIndex<string, ChunkId> {
for (const [name, index] of this.allIndexes()) {
if (name === indexName) {
return index;
}
}
throw new Error(`Unknown index: ${name}`);
throw new Error(`Unknown index: ${indexName}`);
}

allIndexes(): NamedIndex[] {
return [
{ name: "summaries", index: this.summariesIndex },
{ name: "keywords", index: this.keywordsIndex },
{ name: "topics", index: this.topicsIndex },
{ name: "goals", index: this.goalsIndex },
{ name: "dependencies", index: this.dependenciesIndex },
["summaries", this.summariesIndex],
["keywords", this.keywordsIndex],
["topics", this.topicsIndex],
["goals", this.goalsIndex],
["dependencies", this.dependenciesIndex],
];
}
}
Expand Down
49 changes: 40 additions & 9 deletions ts/examples/spelunker/src/pythonImporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ TypeScript, of course).
*/

import chalk, { ChalkInstance } from "chalk";
import * as fs from "fs";
import * as knowLib from "knowledge-processor";
import { asyncArray } from "typeagent";
Expand All @@ -42,8 +43,14 @@ import {
chunkifyPythonFiles,
ErrorItem,
} from "./pythonChunker.js";
import { purgeNormalizedFile } from "./queryInterface.js";

function log(io: iapp.InteractiveIo | undefined, message: string): void {
function log(
io: iapp.InteractiveIo | undefined,
message: string,
color: ChalkInstance,
): void {
message = color(message);
if (io) {
io.writer.writeLine(message);
} else {
Expand All @@ -57,7 +64,7 @@ export async function importAllFiles(
io: iapp.InteractiveIo | undefined,
verbose: boolean,
): Promise<void> {
log(io, `[Importing ${files.length} files]`);
log(io, `[Importing ${files.length} files]`, chalk.grey);

const t0 = Date.now();
await importPythonFiles(files, chunkyIndex, io, verbose);
Expand All @@ -66,6 +73,7 @@ export async function importAllFiles(
log(
io,
`[Imported ${files.length} files in ${((t1 - t0) * 0.001).toFixed(3)} seconds]`,
chalk.grey,
);
}

Expand All @@ -80,6 +88,11 @@ async function importPythonFiles(
fs.existsSync(file) ? fs.realpathSync(file) : file,
);

// Purge previous occurrences of these files.
for (const fileName of filenames) {
await purgeNormalizedFile(io, chunkyIndex, fileName, verbose);
}

// Chunkify Python files using a helper program. (TODO: Make generic over languages)
const t0 = Date.now();
const results = await chunkifyPythonFiles(filenames);
Expand All @@ -88,6 +101,7 @@ async function importPythonFiles(
log(
io,
`[Some over-long files were split into multiple partial files]`,
chalk.yellow,
);
}

Expand Down Expand Up @@ -115,19 +129,24 @@ async function importPythonFiles(
`[Chunked ${results.length} files ` +
`(${numLines} lines, ${numBlobs} blobs, ${numChunks} chunks, ${numErrors} errors) ` +
`in ${((t1 - t0) * 0.001).toFixed(3)} seconds]`,
chalk.gray,
);

const chunkingErrors = results.filter(
(result): result is ErrorItem => "error" in result,
);
for (const error of chunkingErrors) {
log(io, `[Error: ${error.error}; Output: ${error.output ?? ""}]`);
log(
io,
`[Error: ${error.error}; Output: ${error.output ?? ""}]`,
chalk.redBright,
);
}

const chunkedFiles = results.filter(
(result): result is ChunkedFile => "chunks" in result,
);
log(io, `[Documenting ${chunkedFiles.length} files]`);
log(io, `[Documenting ${chunkedFiles.length} files]`, chalk.grey);

const tt0 = Date.now();
const documentedFiles: FileDocumentation[] = [];
Expand All @@ -141,14 +160,17 @@ async function importPythonFiles(
let docs: FileDocumentation;
nChunks += chunkedFile.chunks.length;
try {
docs = await chunkyIndex.fileDocumenter.document(
docs = await exponentialBackoff(
io,
chunkyIndex.fileDocumenter.document,
chunkedFile.chunks,
);
} catch (error) {
const t1 = Date.now();
log(
io,
` [Error documenting ${chunkedFile.fileName} in ${((t1 - t0) * 0.001).toFixed(3)} seconds: ${error}]`,
chalk.redBright,
);
return;
}
Expand All @@ -157,6 +179,7 @@ async function importPythonFiles(
log(
io,
` [Documented ${chunkedFile.chunks.length} chunks in ${((t1 - t0) * 0.001).toFixed(3)} seconds for ${chunkedFile.fileName}]`,
chalk.grey,
);
documentedFiles.push(docs);
},
Expand All @@ -166,13 +189,14 @@ async function importPythonFiles(
log(
io,
`[Documented ${documentedFiles.length} files (${nChunks} chunks) in ${((tt1 - tt0) * 0.001).toFixed(3)} seconds]`,
chalk.grey,
);

const nonEmptyFiles = chunkedFiles.filter(
(cf) => cf.chunks.filter((c) => c.docs).length,
);

log(io, `[Embedding ${nonEmptyFiles.length} files]`);
log(io, `[Embedding ${nonEmptyFiles.length} files]`, chalk.grey);

if (nonEmptyFiles.length) {
const ttt0 = Date.now();
Expand All @@ -186,6 +210,7 @@ async function importPythonFiles(
log(
io,
`[Embedded ${documentedFiles.length} files in ${((ttt1 - ttt0) * 0.001).toFixed(3)} seconds]`,
chalk.grey,
);
}
}
Expand All @@ -198,7 +223,7 @@ export async function embedChunkedFile(
): Promise<void> {
const chunks: Chunk[] = chunkedFile.chunks;
if (chunks.length === 0) {
log(io, `[Skipping empty file ${chunkedFile.fileName}]`);
log(io, `[Skipping empty file ${chunkedFile.fileName}]`, chalk.yellow);
return;
}
const t0 = Date.now();
Expand All @@ -209,6 +234,7 @@ export async function embedChunkedFile(
log(
io,
` [Embedded ${chunks.length} chunks in ${((t1 - t0) * 0.001).toFixed(3)} seconds for ${chunkedFile.fileName}]`,
chalk.grey,
);
}

Expand Down Expand Up @@ -271,6 +297,7 @@ async function embedChunk(
io,
` [Embedded ${chunk.id} (${lineCount} lines @ ${chunk.blobs[0].start}) ` +
`in ${((t1 - t0) * 0.001).toFixed(3)} seconds for ${chunk.fileName}]`,
chalk.gray,
);
}
}
Expand All @@ -297,10 +324,14 @@ async function exponentialBackoff<T extends any[], R>(
return await callable(...args);
} catch (error) {
if (timeout > 1000) {
log(io, `[Error: ${error}; giving up]`);
log(io, `[Error: ${error}; giving up]`, chalk.redBright);
throw error;
}
log(io, `[Error: ${error}; retrying in ${timeout} ms]`);
log(
io,
`[Error: ${error}; retrying in ${timeout} ms]`,
chalk.redBright,
);
await new Promise((resolve) => setTimeout(resolve, timeout));
timeout *= 2;
}
Expand Down
Loading

0 comments on commit 685eacc

Please sign in to comment.