Skip to content

Commit aa2140e

Browse files
committed
Use sonnet
0 parents  commit aa2140e

File tree

3 files changed

+282
-0
lines changed

3 files changed

+282
-0
lines changed

bun.lockb

15.6 KB
Binary file not shown.

package.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"name": "repo-summarizer",
3+
"module": "index.ts",
4+
"type": "module",
5+
"devDependencies": {
6+
"@types/bun": "latest"
7+
},
8+
"peerDependencies": {
9+
"typescript": "^5.0.0"
10+
},
11+
"dependencies": {
12+
"@google-cloud/vertexai": "^1.7.0",
13+
"node-html-parser": "^6.1.13"
14+
}
15+
}

process_repo.ts

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
import { promises as fs } from 'fs';
2+
import path from 'path';
3+
import { VertexAI } from "@google-cloud/vertexai";
4+
import Anthropic from "@anthropic-ai/sdk";
5+
import type { TextBlock } from '@anthropic-ai/sdk/resources/index.mjs';
6+
7+
8+
const repoPath = process.argv[2];
9+
if (!repoPath) {
10+
console.error('Please provide the repository path as an argument.');
11+
process.exit(1);
12+
}
13+
const igName = repoPath.split('/').filter(Boolean).pop();
14+
15+
await fs.mkdir('summaries', { recursive: true });
16+
async function processRepo(repoPath: string) {
17+
const inputDir = path.join(repoPath, 'input');
18+
const outputFile = path.join('summaries', `${igName}.md`);
19+
20+
// Check if output file already exists
21+
try {
22+
await fs.access(outputFile);
23+
console.log(`Summary ${outputFile} for ${igName} already exists. Skipping this repo.`);
24+
return;
25+
} catch (error) {
26+
// File doesn't exist, continue processing
27+
}
28+
29+
// Check if 'input' folder exists
30+
try {
31+
await fs.access(inputDir);
32+
} catch (error) {
33+
console.log(`No 'input' folder found in ${repoPath}. Skipping this repo.`);
34+
return;
35+
}
36+
37+
// Concatenate all relevant files
38+
const fileContent = await concatenateFiles(inputDir);
39+
40+
// Prepare and send prompt to Vertex AI
41+
await generateSummary(fileContent);
42+
}
43+
44+
async function concatenateFiles(dir: string): Promise<string> {
45+
const allowedExtensions = ['.txt', '.md', '.plantuml', '.fsh'];
46+
let allFiles: { path: string; depth: number; name: string; isIndex: boolean; isPageContent: boolean }[] = [];
47+
48+
async function collectFiles(currentDir: string) {
49+
const entries = await fs.readdir(currentDir, { withFileTypes: true });
50+
51+
for (const entry of entries) {
52+
const fullPath = path.join(currentDir, entry.name);
53+
if (entry.isDirectory()) {
54+
await collectFiles(fullPath);
55+
} else if (entry.isFile() && allowedExtensions.includes(path.extname(entry.name)) && entry.name !== 'ignoreWarnings.txt') {
56+
const relativePath = path.relative(dir, fullPath);
57+
const depth = relativePath.split(path.sep).length;
58+
const isIndex = entry.name.toLowerCase().includes('index');
59+
const isPageContent = relativePath.toLowerCase().includes('pagecontent');
60+
allFiles.push({ path: fullPath, depth, name: entry.name, isIndex, isPageContent });
61+
}
62+
}
63+
}
64+
65+
await collectFiles(dir);
66+
67+
// Sort all files once
68+
allFiles.sort((a, b) => {
69+
if (a.isPageContent !== b.isPageContent) return a.isPageContent ? -1 : 1;
70+
if (a.depth !== b.depth) return a.depth - b.depth;
71+
if (a.isIndex !== b.isIndex) return a.isIndex ? -1 : 1;
72+
return a.name.length - b.name.length;
73+
});
74+
75+
// Process sorted files
76+
let content = '';
77+
const maxSize = 400 * 1024; // 400KB in bytes
78+
let currentSize = 0;
79+
80+
for (const file of allFiles) {
81+
const fileContent = await fs.readFile(file.path, 'utf-8');
82+
const sourceContent = `<source path="${file.path}">\n${fileContent}</source>\n`;
83+
const sourceSize = Buffer.byteLength(sourceContent, 'utf-8');
84+
85+
if (currentSize + sourceSize > maxSize) {
86+
console.log(`Skipping ${file.path} as it would exceed the 400KB limit.`);
87+
continue;
88+
}
89+
90+
content += sourceContent;
91+
currentSize += sourceSize;
92+
93+
if (currentSize >= maxSize) {
94+
console.log(`Reached 400KB limit. Stopping file processing.`);
95+
break;
96+
}
97+
}
98+
99+
console.log(`Total content size: ${currentSize / 1024} KB`);
100+
return content;
101+
}
102+
103+
async function generateSummary(content: string) {
104+
const vertexAI = new VertexAI({
105+
project: "fhir-org-starter-project",
106+
location: "us-central1",
107+
});
108+
109+
const anthropic = new Anthropic();
110+
111+
112+
113+
114+
const generativeModel = vertexAI.getGenerativeModel({
115+
model: "gemini-pro-experimental",
116+
generationConfig: {
117+
temperature: 0.7,
118+
}
119+
});
120+
121+
const promptInstructions = `# FHIR IG Analysis
122+
Given the FHIR Implementation Guide (IG) source files above, provide a structured analysis addressing the following questions:
123+
124+
1. What is this IG trying to achieve, in what context, and for whom? Explain its objectives in simple terms.
125+
2. How does this IG improve upon previous approaches? (Use only information from the input files; avoid speculation.)
126+
3. What are the key features and technical approaches of this IG?
127+
4. How does this IG relate to broader healthcare standards and regulations?
128+
5. Who are the primary users or beneficiaries of this IG, including patients if applicable?
129+
130+
Provide concise, factual responses to each question based on the content of the IG. Aim for clarity and precision in your analysis. Begin with "# $igName: Analysis" and do not output anything other than the analysis.`;
131+
132+
const request = {
133+
systemInstruction: "You are a health information technology expert.",
134+
contents: [
135+
{ role: 'user', parts: [{ text: content + "\n\n" + promptInstructions }] },
136+
]
137+
};
138+
139+
// Create prompts directory if it doesn't exist
140+
await fs.mkdir('prompts', { recursive: true });
141+
142+
// Write the full prompt request to a file
143+
await fs.writeFile(path.join('prompts', `${igName}.txt`), JSON.stringify(request, null, 2));
144+
145+
try {
146+
const response = await generativeModel.generateContent(request);
147+
let analysis = response.response.candidates?.[0].content.parts[0].text || "";
148+
console.log('Initial Analysis:', analysis);
149+
150+
// Create analysis directory if it doesn't exist
151+
await fs.mkdir('analysis', { recursive: true });
152+
153+
// Save the initial analysis to a file in the analysis directory
154+
await fs.writeFile(path.join('analysis', `${igName}.md`), analysis);
155+
const basicGuidelines = `
156+
1. Explain the IG's purpose, country of use (if applicable), and context of use / use cases, and key features / how it works. Avoid explaining what standards are in general.
157+
2. Write ~200 words in short paragraphs for a general audience.
158+
3. Use clear, jargon-free language.
159+
4. Write in third-person perspective.
160+
5. Maintain an objective, informative tone.
161+
6. Present information factually.
162+
7. Highlight any key stakeholder benefits.
163+
8. Mention how the IG relates to other standards or regulations, if this is direct and relevant. Otherwise omit this.
164+
9. Avoid promotional language or unverified claims.
165+
`
166+
const revisionGuidelines = `
167+
Please revise this summary to adhere to the following revision guideline:
168+
- Rather than referring to an "IG" or "Implementation Guide", just call it a "standard".
169+
- Remove any explanation that healthcare standards are like a common language or that they help computers talk to each other. That's common knowledge.
170+
- Remove any explanation of what FHIR is; do not expand the acronym FHIR; just call it FHIR.
171+
- Remove any explanation of what an API is; do not expand the acronym API; just call it an API.
172+
- Remove any explanation of what an EHR is; do not expand the acronym EHR; just call it an EHR.
173+
- Eliminate any speculative or indirect information about benefits.
174+
- Remove any redundancy in the summary.
175+
- Remove any mention of things you don't know or aren't sure about this IG
176+
- Remove any mention that this IG builds on FHIR; that is common knowledge.
177+
`
178+
179+
// Refinement stage
180+
const refinementPrompt = `
181+
Here is the analysis of a FHIR Implementation Guide:
182+
183+
${analysis}
184+
185+
Use the analysis to create a plain language summary of the guide that adheres to these guidelines:
186+
187+
${basicGuidelines}
188+
189+
${revisionGuidelines}
190+
191+
Provide only the refined summary as your response, without additional explanations or comments.`;
192+
193+
const msg = await anthropic.messages.create({
194+
model: "claude-3-5-sonnet-20240620",
195+
max_tokens: 1182,
196+
temperature: 0.6,
197+
system: "You are a skilled communicator with expertise in health information technology and a knack for clear, concise writing.",
198+
messages: [
199+
{
200+
"role": "user",
201+
"content": [
202+
{
203+
"type": "text",
204+
"text": `${refinementPrompt}\n\n`
205+
}
206+
]
207+
},
208+
{
209+
"role": "assistant",
210+
"content": [
211+
{
212+
"type": "text",
213+
"text": "This standard"
214+
}
215+
]
216+
}
217+
]
218+
});
219+
220+
// const refinementRequest = {
221+
// systemInstruction: "You are a skilled communicator with expertise in health information technology and a knack for clear, concise writing.",
222+
// contents: [
223+
// { role: 'user', parts: [{ text: content + "\n\n" + promptInstructions }] },
224+
// { role: 'user', parts: [{ text: refinementPrompt }] },
225+
// ]
226+
// };
227+
228+
await fs.writeFile(path.join('prompts', `${igName}-refinement.txt`), JSON.stringify(refinementPrompt, null, 2));
229+
// const refinementResponse = await generativeModel.generateContent(refinementRequest);
230+
const refinementResponse = "This standard " + (msg.content[0] as TextBlock).text!;
231+
// let refinedSummary = refinementResponse.response.candidates?.[0].content.parts[0].text || analysis;
232+
// console.log('Refined Summary:', refinedSummary);
233+
234+
// const finalRefinementPrompt = `
235+
236+
// Basic guidelines:
237+
// ${basicGuidelines}
238+
239+
// Here's a summary of a FHIR Implementation Guide:
240+
// ${refinedSummary}
241+
242+
243+
// Retain the original summary and only revise it to address the guidelines above.
244+
245+
// Provide only the final refined summary as your response, without any additional explanations or comments.`;
246+
247+
// const finalRefinementRequest = {
248+
// contents: [
249+
// { role: 'user', parts: [{ text: analysis }] },
250+
// { role: 'user', parts: [{ text: finalRefinementPrompt }] },
251+
// ]
252+
// };
253+
254+
// console.log("Requesting final refinement");
255+
// await fs.writeFile(path.join('prompts', `${igName}-final-refinement.txt`), JSON.stringify(finalRefinementRequest, null, 2));
256+
// const finalRefinementResponse = await generativeModel.generateContent(finalRefinementRequest);
257+
// refinedSummary = finalRefinementResponse.response.candidates?.[0].content.parts[0].text || refinedSummary;
258+
// console.log('Final Refined Summary:', refinedSummary);
259+
260+
console.log("Refined as", refinementResponse);
261+
await fs.writeFile(path.join('summaries', `${igName}.md`), refinementResponse);
262+
} catch (error) {
263+
console.error('Error generating or refining summary:', error);
264+
}
265+
}
266+
267+
processRepo(repoPath).catch(console.error);

0 commit comments

Comments
 (0)