Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions app/api/web-search/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,31 @@
* Simple JSON request/response using Tavily search.
*/

import { NextRequest } from 'next/server';
import { callLLM } from '@/lib/ai/llm';
import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily';
import { resolveWebSearchApiKey } from '@/lib/server/provider-config';
import { createLogger } from '@/lib/logger';
import { apiError, apiSuccess } from '@/lib/server/api-response';
import {
buildSearchQuery,
SEARCH_QUERY_REWRITE_EXCERPT_LENGTH,
} from '@/lib/server/search-query-builder';
import { resolveModelFromHeaders } from '@/lib/server/resolve-model';
import type { AICallFn } from '@/lib/generation/pipeline-types';

const log = createLogger('WebSearch');

export async function POST(req: Request) {
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { query, apiKey: clientApiKey } = body as {
const {
query,
pdfText,
apiKey: clientApiKey,
} = body as {
query?: string;
pdfText?: string;
apiKey?: string;
};

Expand All @@ -33,7 +46,40 @@ export async function POST(req: Request) {
);
}

const result = await searchWithTavily({ query: query.trim(), apiKey });
// Clamp rewrite input at the route boundary; framework body limits still apply to total request size.
const boundedPdfText = pdfText?.slice(0, SEARCH_QUERY_REWRITE_EXCERPT_LENGTH);

let aiCall: AICallFn | undefined;
try {
const { model: languageModel } = resolveModelFromHeaders(req);
aiCall = async (systemPrompt, userPrompt) => {
const result = await callLLM(
{
model: languageModel,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
maxOutputTokens: 256,
},
'web-search-query-rewrite',
);
return result.text;
};
} catch (error) {
log.warn('Search query rewrite model unavailable, falling back to raw requirement:', error);
}

const searchQuery = await buildSearchQuery(query, boundedPdfText, aiCall);

log.info('Running web search API request', {
hasPdfContext: searchQuery.hasPdfContext,
rawRequirementLength: searchQuery.rawRequirementLength,
rewriteAttempted: searchQuery.rewriteAttempted,
finalQueryLength: searchQuery.finalQueryLength,
});

const result = await searchWithTavily({ query: searchQuery.query, apiKey });
const context = formatSearchResultsAsContext(result);

return apiSuccess({
Expand Down
3 changes: 2 additions & 1 deletion app/generation-preview/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,10 @@ function GenerationPreviewContent() {
wsSettings.webSearchProvidersConfig?.[wsSettings.webSearchProviderId]?.apiKey;
const res = await fetch('/api/web-search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: getApiHeaders(),
body: JSON.stringify({
query: currentSession.requirements.requirement,
pdfText: currentSession.pdfText || undefined,
apiKey: wsApiKey || undefined,
}),
signal,
Expand Down
1 change: 1 addition & 0 deletions lib/generation/prompts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export {
// Prompt IDs constant
export const PROMPT_IDS = {
REQUIREMENTS_TO_OUTLINES: 'requirements-to-outlines',
WEB_SEARCH_QUERY_REWRITE: 'web-search-query-rewrite',
SLIDE_CONTENT: 'slide-content',
QUIZ_CONTENT: 'quiz-content',
SLIDE_ACTIONS: 'slide-actions',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Web Search Query Rewriter

You rewrite user requests into concise, high-signal web search queries as JSON.

{{snippet:json-output-rules}}

## Rules

- Return a JSON object with exactly one field: `query`
- Preserve the user's intent
- If a PDF excerpt is provided, use it to infer the topic, title, authors, methods, keywords, or named entities when helpful
- Ignore boilerplate, copyright text, page numbers, and irrelevant noise
- Prefer concrete topic terms over vague references like "this paper" or "this document"
- Keep the query under 320 characters
- If the original requirement is already concise and specific, keep it close to the original
- If the PDF excerpt is unhelpful, rely on the requirement

## Output Format

Example output:
{ "query": "your concise web search query" }
14 changes: 14 additions & 0 deletions lib/generation/prompts/templates/web-search-query-rewrite/user.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## User Requirement

{{requirement}}

## PDF Excerpt

{{pdfExcerpt}}

## Task

Write the single best web search query as a JSON object with a `query` field only.

Output JSON directly (no explanation, no code fences).
Example: {"query":"Attention Is All You Need transformer Vaswani 2017"}
1 change: 1 addition & 0 deletions lib/generation/prompts/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
export type PromptId =
| 'requirements-to-outlines'
| 'web-search-query-rewrite'
| 'slide-content'
| 'quiz-content'
| 'slide-actions'
Expand Down
31 changes: 29 additions & 2 deletions lib/server/classroom-generation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { createLogger } from '@/lib/logger';
import { parseModelString } from '@/lib/ai/providers';
import { resolveApiKey, resolveWebSearchApiKey } from '@/lib/server/provider-config';
import { resolveModel } from '@/lib/server/resolve-model';
import { buildSearchQuery } from '@/lib/server/search-query-builder';
import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily';
import { persistClassroom } from '@/lib/server/classroom-storage';
import {
Expand Down Expand Up @@ -203,6 +204,21 @@ export async function generateClassroom(
return result.text;
};

const searchQueryAiCall: AICallFn = async (systemPrompt, userPrompt, _images) => {
const result = await callLLM(
{
model: languageModel,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
maxOutputTokens: 256,
},
'web-search-query-rewrite',
);
return result.text;
};

const lang = normalizeLanguage(input.language);
const requirements: UserRequirements = {
requirement,
Expand Down Expand Up @@ -240,8 +256,19 @@ export async function generateClassroom(
const tavilyKey = resolveWebSearchApiKey();
if (tavilyKey) {
try {
log.info('Running web search for requirement context...');
const searchResult = await searchWithTavily({ query: requirement, apiKey: tavilyKey });
const searchQuery = await buildSearchQuery(requirement, pdfText, searchQueryAiCall);

log.info('Running web search for classroom generation', {
hasPdfContext: searchQuery.hasPdfContext,
rawRequirementLength: searchQuery.rawRequirementLength,
rewriteAttempted: searchQuery.rewriteAttempted,
finalQueryLength: searchQuery.finalQueryLength,
});

const searchResult = await searchWithTavily({
query: searchQuery.query,
apiKey: tavilyKey,
});
researchContext = formatSearchResultsAsContext(searchResult);
if (researchContext) {
log.info(`Web search returned ${searchResult.sources.length} sources`);
Expand Down
99 changes: 99 additions & 0 deletions lib/server/search-query-builder.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { parseJsonResponse } from '@/lib/generation/json-repair';
import { PROMPT_IDS, buildPrompt } from '@/lib/generation/prompts';
import type { AICallFn } from '@/lib/generation/pipeline-types';
import { createLogger } from '@/lib/logger';

const log = createLogger('SearchQueryBuilder');
const TAVILY_SOFT_MAX_QUERY_LENGTH = 350;
export const SEARCH_QUERY_REWRITE_EXCERPT_LENGTH = 7000;

interface SearchQueryRewriteResponse {
query: string;
}

export interface SearchQueryBuildResult {
query: string;
rewriteAttempted: boolean;
rawRequirementLength: number;
finalQueryLength: number;
hasPdfContext: boolean;
}

function normalizeSearchRequirement(requirement: string): string {
return requirement.replace(/\s+/g, ' ').trim();
}

function normalizePdfExcerpt(pdfText?: string): string {
if (!pdfText) {
return '';
}

return pdfText.replace(/\s+/g, ' ').trim().slice(0, SEARCH_QUERY_REWRITE_EXCERPT_LENGTH);
}

function shouldRewriteSearchQuery(
normalizedRequirement: string,
normalizedPdfExcerpt: string,
): boolean {
return normalizedRequirement.length > 400 || Boolean(normalizedPdfExcerpt);
}

export async function buildSearchQuery(
requirement: string,
pdfText: string | undefined,
aiCall?: AICallFn,
): Promise<SearchQueryBuildResult> {
const normalizedRequirement = normalizeSearchRequirement(requirement);
const pdfExcerpt = normalizePdfExcerpt(pdfText);
const hasPdfContext = Boolean(pdfExcerpt);
const rewriteAttempted = shouldRewriteSearchQuery(normalizedRequirement, pdfExcerpt);

const fallback = {
query: normalizedRequirement,
rewriteAttempted,
rawRequirementLength: normalizedRequirement.length,
finalQueryLength: normalizedRequirement.length,
hasPdfContext,
} satisfies SearchQueryBuildResult;

if (!normalizedRequirement || !rewriteAttempted) {
return fallback;
}

if (!aiCall) {
log.warn('Query rewrite AI call unavailable, falling back to raw requirement');
return fallback;
}

const prompts = buildPrompt(PROMPT_IDS.WEB_SEARCH_QUERY_REWRITE, {
requirement: normalizedRequirement,
pdfExcerpt: pdfExcerpt || 'None',
});

if (!prompts) {
log.warn('Query rewrite prompt not found, falling back to raw requirement');
return fallback;
}

try {
const response = await aiCall(prompts.system, prompts.user);
const parsed = parseJsonResponse<SearchQueryRewriteResponse>(response);
const rewrittenQuery = normalizeSearchRequirement(parsed?.query || '').slice(
0,
TAVILY_SOFT_MAX_QUERY_LENGTH,
);
if (!rewrittenQuery) {
log.warn('Query rewrite returned empty output, falling back to raw requirement');
return fallback;
}

return {
...fallback,
query: rewrittenQuery,
finalQueryLength: rewrittenQuery.length,
};
} catch (error) {
log.warn('Query rewrite failed, falling back to raw requirement:', error);
return fallback;
}
}
Loading