Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
321 changes: 187 additions & 134 deletions express-api/index.js
Original file line number Diff line number Diff line change
@@ -1,155 +1,208 @@
// --------------------------------------
// 1. Import necessary modules
import express from 'express';
import bodyParser from 'body-parser';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { OpenAIEmbeddings } from '@langchain/openai';
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
import { BraveSearch } from "@langchain/community/tools/brave_search";
import OpenAI from 'openai';
import cheerio from 'cheerio';
import dotenv from 'dotenv';
// --------------------------------------
import express from "express";
import bodyParser from "body-parser";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OpenAIEmbeddings } from "@langchain/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { BraveSearch } from "@langchain/community/tools/brave_search";
import OpenAI from "openai";
import cheerio from "cheerio";
import dotenv from "dotenv";

dotenv.config();

// --------------------------------------
// 2. Initialize Express
// --------------------------------------
const app = express();
const port = 3005;
// 3. Middleware
app.use(bodyParser.json());
// 4. Initialize Groq and embeddings
let openai = new OpenAI({
baseURL: 'https://api.groq.com/openai/v1',

// --------------------------------------
// 3. Initialize OpenAI + Embeddings
// --------------------------------------
const openai = new OpenAI({
baseURL: "https://api.groq.com/openai/v1",
apiKey: process.env.GROQ_API_KEY,
});

const embeddings = new OpenAIEmbeddings();
// 5. Define the route for POST requests
app.post('/', async (req, res) => {
// 6. Handle POST requests
console.log(`1. Received POST request`);
// 7. Extract request data
const { message, returnSources = true, returnFollowUpQuestions = true, embedSourcesInLLMResponse = false, textChunkSize = 800, textChunkOverlap = 200, numberOfSimilarityResults = 2, numberOfPagesToScan = 4 } = req.body;
console.log(`2. Destructured request data`);
// 8. Define rephrase function
async function rephraseInput(inputString) {
console.log(`4. Rephrasing input`);
// 9. Rephrase input using Groq
const groqResponse = await openai.chat.completions.create({

// --------------------------------------
// 4. Helper: Rephrase user input
// --------------------------------------
async function rephraseInput(input) {
try {
const response = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{ role: "system", content: "You are a rephraser and always respond with a rephrased version of the input that is given to a search engine API. Always be succint and use the same words as the input. ONLY RETURN THE REPHRASED VERSION OF THE INPUT." },
{ role: "user", content: inputString },
{
role: "system",
content:
"You are a rephraser. Always reply with a concise version of the input, optimized for a search engine query.",
},
{ role: "user", content: input },
],
});
console.log(`5. Rephrased input and got answer from Groq`);
return groqResponse.choices[0].message.content;
return response.choices[0].message.content;
} catch (err) {
console.error("Error rephrasing input:", err);
return input;
}
// 10. Define search engine function
async function searchEngineForSources(message) {
console.log(`3. Initializing Search Engine Process`);
// 11. Initialize BraveSearch
const loader = new BraveSearch({ apiKey: process.env.BRAVE_SEARCH_API_KEY });
// 12. Rephrase the message
const rephrasedMessage = await rephraseInput(message);
console.log(`6. Rephrased message and got documents from BraveSearch`);
// 13. Get documents from BraveSearch
const docs = await loader.call(rephrasedMessage, { count: numberOfPagesToScan });
// 14. Normalize data
const normalizedData = normalizeData(docs);
// 15. Process and vectorize the content
return await Promise.all(normalizedData.map(fetchAndProcess));
}
// 16. Normalize data
function normalizeData(docs) {
return JSON.parse(docs)
.filter((doc) => doc.title && doc.link && !doc.link.includes("brave.com"))
.slice(0, numberOfPagesToScan)
.map(({ title, link }) => ({ title, link }));
}

// --------------------------------------
// 5. Helper: Extract main text from a web page
// --------------------------------------
function extractMainContent(html, link) {
const $ = cheerio.load(html);
$("script, style, head, nav, footer, iframe, img").remove();
return $("body").text().replace(/\s+/g, " ").trim();
}

// --------------------------------------
// 6. Helper: Generate Follow-Up Questions
// --------------------------------------
async function generateFollowUpQuestions(answer) {
try {
const groqResponse = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{
role: "system",
content:
"Generate 3 relevant follow-up questions based on the provided text. Return them as a JSON array.",
},
{
role: "user",
content: `Generate 3 follow-up questions for: ${answer}`,
},
],
});

return JSON.parse(groqResponse.choices[0].message.content);
} catch (err) {
console.error("Error generating follow-up questions:", err);
return ["Can you explain more?", "Why is that important?", "Tell me more about this topic."];
}
// 17. Fetch page content
const fetchPageContent = async (link) => {
console.log(`7. Fetching page content for ${link}`);
try {
const response = await fetch(link);
if (!response.ok) {
return ""; // skip if fetch fails
}
const text = await response.text();
return extractMainContent(text, link);
} catch (error) {
console.error(`Error fetching page content for ${link}:`, error);
return '';
}
}

// --------------------------------------
// 7. Helper: Custom Domain Knowledge
// --------------------------------------
function getCustomKnowledge(message) {
const text = message.toLowerCase();

const facts = {
"pm of india": "🇮🇳 The current Prime Minister of India is **Narendra Modi**, serving since May 2014.",
dog: "🐶 Dogs are loyal domestic animals known as human’s best friends.",
cat: "🐱 Cats are independent and curious animals, loved for their agility and affection.",
tiger: "🐯 Tigers are the largest wild cats and apex predators found mostly in Asia.",
space: "🚀 Space is a vast expanse beyond Earth’s atmosphere, filled with stars, galaxies, and planets.",
ocean: "🌊 Oceans cover over 70% of Earth’s surface and are home to millions of species.",
};
// 18. Extract main content from the HTML page
function extractMainContent(html, link) {
console.log(`8. Extracting main content from HTML for ${link}`);
const $ = html.length ? cheerio.load(html) : null
$("script, style, head, nav, footer, iframe, img").remove();
return $("body").text().replace(/\s+/g, " ").trim();

for (const key in facts) {
if (text.includes(key)) return facts[key];
}
// 19. Process and vectorize the content
let vectorCount = 0;
const fetchAndProcess = async (item) => {
const htmlContent = await fetchPageContent(item.link);
if (htmlContent && htmlContent.length < 250) return null;
const splitText = await new RecursiveCharacterTextSplitter({ chunkSize: textChunkSize, chunkOverlap: textChunkOverlap }).splitText(htmlContent);
const vectorStore = await MemoryVectorStore.fromTexts(splitText, { link: item.link, title: item.title }, embeddings);
vectorCount++;
console.log(`9. Processed ${vectorCount} sources for ${item.link}`);
return await vectorStore.similaritySearch(message, numberOfSimilarityResults);
};
// 20. Fetch and process sources
const sources = await searchEngineForSources(message, textChunkSize, textChunkOverlap);
const sourcesParsed = sources.map(group =>
group.map(doc => {
const title = doc.metadata.title;
const link = doc.metadata.link;
return { title, link };
})
.filter((doc, index, self) => self.findIndex(d => d.link === doc.link) === index)
);
console.log(`10. RAG complete sources and preparing response content`);
// 21. Prepare the response content
const chatCompletion = await openai.chat.completions.create({
messages:
[{
role: "system", content: `
- Here is my query "${message}", respond back with an answer that is as long as possible. If you can't find any relevant results, respond with "No relevant results found."
- ${embedSourcesInLLMResponse ? "Return the sources used in the response with iterable numbered markdown style annotations." : ""}" : ""}`
},
{ role: "user", content: ` - Here are the top results from a similarity search: ${JSON.stringify(sources)}. ` },
], stream: true, model: "mixtral-8x7b-32768"
});
console.log(`11. Sent content to Groq for chat completion.`);
let responseTotal = "";
console.log(`12. Streaming response from Groq... \n`);
for await (const chunk of chatCompletion) {
if (chunk.choices[0].delta && chunk.choices[0].finish_reason !== "stop") {
process.stdout.write(chunk.choices[0].delta.content);
responseTotal += chunk.choices[0].delta.content;
} else {
let responseObj = {};
returnSources ? responseObj.sources = sourcesParsed : null;
responseObj.answer = responseTotal;
returnFollowUpQuestions ? responseObj.followUpQuestions = await generateFollowUpQuestions(responseTotal) : null;
console.log(`\n\n13. Generated follow-up questions: ${JSON.stringify(responseObj.followUpQuestions)}`);
res.status(200).json(responseObj);

return null;
}

// --------------------------------------
// 8. Main POST Route
// --------------------------------------
app.post("/", async (req, res) => {
const startTime = Date.now();
const {
message,
textChunkSize = 800,
textChunkOverlap = 200,
numberOfSimilarityResults = 2,
numberOfPagesToScan = 4,
} = req.body;

console.log("\n📩 New query:", message);

try {
// Check for quick domain knowledge
const predefined = getCustomKnowledge(message);
if (predefined) {
return res.json({
answer: predefined,
sources: [],
followUpQuestions: await generateFollowUpQuestions(predefined),
});
}

// Rephrase query
const rephrasedMessage = await rephraseInput(message);
console.log("🔁 Rephrased:", rephrasedMessage);

// Initialize Brave Search
const loader = new BraveSearch({ apiKey: process.env.BRAVE_SEARCH_API_KEY });
const docs = await loader.call(rephrasedMessage, { count: numberOfPagesToScan });
const normalized = JSON.parse(docs)
.filter((d) => d.title && d.link)
.slice(0, numberOfPagesToScan);

console.log(`🔍 Found ${normalized.length} relevant web pages.`);

// Fetch, chunk, and vectorize
const sources = await Promise.all(
normalized.map(async ({ title, link }) => {
try {
const response = await fetch(link);
const html = await response.text();
const content = extractMainContent(html, link);
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: textChunkSize,
chunkOverlap: textChunkOverlap,
});
const chunks = await splitter.splitText(content);
const store = await MemoryVectorStore.fromTexts(chunks, { link, title }, embeddings);
return await store.similaritySearch(message, numberOfSimilarityResults);
} catch (err) {
console.error("Error processing link:", link, err);
return [];
}
})
);

// Prepare LLM summary
const chat = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{
role: "system",
content: `You are an intelligent assistant. Respond with an informative and structured summary for the query: "${message}" using context below.`,
},
{ role: "user", content: `Sources: ${JSON.stringify(sources)}` },
],
});

const finalAnswer = chat.choices[0].message.content;

// Return response
res.json({
answer: finalAnswer,
sources,
followUpQuestions: await generateFollowUpQuestions(finalAnswer),
responseTime: `${(Date.now() - startTime) / 1000}s`,
});

console.log("✅ Response sent in", (Date.now() - startTime) / 1000, "seconds");
} catch (error) {
console.error("❌ Error in processing:", error);
res.status(500).json({ error: "Internal Server Error", details: error.message });
}
});
// 22. Generate follow-up questions
async function generateFollowUpQuestions(responseText) {
const groqResponse = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{ role: "system", content: "You are a question generator. Generate 3 follow-up questions based on the provided text. Return the questions in an array format." },
{
role: "user",
content: `Generate 3 follow-up questions based on the following text:\n\n${responseText}\n\nReturn the questions in the following format: ["Question 1", "Question 2", "Question 3"]`
}
],
});
return JSON.parse(groqResponse.choices[0].message.content);
}
// 23. Notify when the server starts listening

// --------------------------------------
// 9. Start Server
// --------------------------------------
app.listen(port, () => {
console.log(`Server is listening on port ${port}`);
})
console.log(`🚀 Server is running on http://localhost:${port}`);
});
Binary file added node-installer.msi
Binary file not shown.