diff --git a/app/api/web-search/route.ts b/app/api/web-search/route.ts index f2ff627f..3dfc75f3 100644 --- a/app/api/web-search/route.ts +++ b/app/api/web-search/route.ts @@ -5,18 +5,31 @@ * Simple JSON request/response using Tavily search. */ +import { NextRequest } from 'next/server'; +import { callLLM } from '@/lib/ai/llm'; import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily'; import { resolveWebSearchApiKey } from '@/lib/server/provider-config'; import { createLogger } from '@/lib/logger'; import { apiError, apiSuccess } from '@/lib/server/api-response'; +import { + buildSearchQuery, + SEARCH_QUERY_REWRITE_EXCERPT_LENGTH, +} from '@/lib/server/search-query-builder'; +import { resolveModelFromHeaders } from '@/lib/server/resolve-model'; +import type { AICallFn } from '@/lib/generation/pipeline-types'; const log = createLogger('WebSearch'); -export async function POST(req: Request) { +export async function POST(req: NextRequest) { try { const body = await req.json(); - const { query, apiKey: clientApiKey } = body as { + const { + query, + pdfText, + apiKey: clientApiKey, + } = body as { query?: string; + pdfText?: string; apiKey?: string; }; @@ -33,7 +46,40 @@ export async function POST(req: Request) { ); } - const result = await searchWithTavily({ query: query.trim(), apiKey }); + // Clamp rewrite input at the route boundary; framework body limits still apply to total request size. + const boundedPdfText = pdfText?.slice(0, SEARCH_QUERY_REWRITE_EXCERPT_LENGTH); + + let aiCall: AICallFn | undefined; + try { + const { model: languageModel } = resolveModelFromHeaders(req); + aiCall = async (systemPrompt, userPrompt) => { + const result = await callLLM( + { + model: languageModel, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + maxOutputTokens: 256, + }, + 'web-search-query-rewrite', + ); + return result.text; + }; + } catch (error) { + log.warn('Search query rewrite model unavailable, falling back to raw requirement:', error); + } + + const searchQuery = await buildSearchQuery(query, boundedPdfText, aiCall); + + log.info('Running web search API request', { + hasPdfContext: searchQuery.hasPdfContext, + rawRequirementLength: searchQuery.rawRequirementLength, + rewriteAttempted: searchQuery.rewriteAttempted, + finalQueryLength: searchQuery.finalQueryLength, + }); + + const result = await searchWithTavily({ query: searchQuery.query, apiKey }); const context = formatSearchResultsAsContext(result); return apiSuccess({ diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index b5380973..7be2a114 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -309,9 +309,10 @@ function GenerationPreviewContent() { wsSettings.webSearchProvidersConfig?.[wsSettings.webSearchProviderId]?.apiKey; const res = await fetch('/api/web-search', { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: getApiHeaders(), body: JSON.stringify({ query: currentSession.requirements.requirement, + pdfText: currentSession.pdfText || undefined, apiKey: wsApiKey || undefined, }), signal, diff --git a/lib/generation/prompts/index.ts b/lib/generation/prompts/index.ts index 8c6b2967..88371055 100644 --- a/lib/generation/prompts/index.ts +++ b/lib/generation/prompts/index.ts @@ -22,6 +22,7 @@ export { // Prompt IDs constant export const PROMPT_IDS = { REQUIREMENTS_TO_OUTLINES: 'requirements-to-outlines', + WEB_SEARCH_QUERY_REWRITE: 'web-search-query-rewrite', SLIDE_CONTENT: 'slide-content', QUIZ_CONTENT: 'quiz-content', SLIDE_ACTIONS: 'slide-actions', diff --git a/lib/generation/prompts/templates/web-search-query-rewrite/system.md b/lib/generation/prompts/templates/web-search-query-rewrite/system.md new file mode 100644 index 00000000..3d5b6a87 --- /dev/null +++ b/lib/generation/prompts/templates/web-search-query-rewrite/system.md @@ -0,0 +1,21 @@ +# Web Search Query Rewriter + +You rewrite user requests into concise, high-signal web search queries as JSON. + +{{snippet:json-output-rules}} + +## Rules + +- Return a JSON object with exactly one field: `query` +- Preserve the user's intent +- If a PDF excerpt is provided, use it to infer the topic, title, authors, methods, keywords, or named entities when helpful +- Ignore boilerplate, copyright text, page numbers, and irrelevant noise +- Prefer concrete topic terms over vague references like "this paper" or "this document" +- Keep the query under 320 characters +- If the original requirement is already concise and specific, keep it close to the original +- If the PDF excerpt is unhelpful, rely on the requirement + +## Output Format + +Example output: +{ "query": "your concise web search query" } diff --git a/lib/generation/prompts/templates/web-search-query-rewrite/user.md b/lib/generation/prompts/templates/web-search-query-rewrite/user.md new file mode 100644 index 00000000..d52f8185 --- /dev/null +++ b/lib/generation/prompts/templates/web-search-query-rewrite/user.md @@ -0,0 +1,14 @@ +## User Requirement + +{{requirement}} + +## PDF Excerpt + +{{pdfExcerpt}} + +## Task + +Write the single best web search query as a JSON object with a `query` field only. + +Output JSON directly (no explanation, no code fences). +Example: {"query":"Attention Is All You Need transformer Vaswani 2017"} diff --git a/lib/generation/prompts/types.ts b/lib/generation/prompts/types.ts index 6df91b3f..73f0f260 100644 --- a/lib/generation/prompts/types.ts +++ b/lib/generation/prompts/types.ts @@ -7,6 +7,7 @@ */ export type PromptId = | 'requirements-to-outlines' + | 'web-search-query-rewrite' | 'slide-content' | 'quiz-content' | 'slide-actions' diff --git a/lib/server/classroom-generation.ts b/lib/server/classroom-generation.ts index eda67b4c..dc5918cd 100644 --- a/lib/server/classroom-generation.ts +++ b/lib/server/classroom-generation.ts @@ -19,6 +19,7 @@ import { createLogger } from '@/lib/logger'; import { parseModelString } from '@/lib/ai/providers'; import { resolveApiKey, resolveWebSearchApiKey } from '@/lib/server/provider-config'; import { resolveModel } from '@/lib/server/resolve-model'; +import { buildSearchQuery } from '@/lib/server/search-query-builder'; import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily'; import { persistClassroom } from '@/lib/server/classroom-storage'; import { @@ -203,6 +204,21 @@ export async function generateClassroom( return result.text; }; + const searchQueryAiCall: AICallFn = async (systemPrompt, userPrompt, _images) => { + const result = await callLLM( + { + model: languageModel, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + maxOutputTokens: 256, + }, + 'web-search-query-rewrite', + ); + return result.text; + }; + const lang = normalizeLanguage(input.language); const requirements: UserRequirements = { requirement, @@ -240,8 +256,19 @@ export async function generateClassroom( const tavilyKey = resolveWebSearchApiKey(); if (tavilyKey) { try { - log.info('Running web search for requirement context...'); - const searchResult = await searchWithTavily({ query: requirement, apiKey: tavilyKey }); + const searchQuery = await buildSearchQuery(requirement, pdfText, searchQueryAiCall); + + log.info('Running web search for classroom generation', { + hasPdfContext: searchQuery.hasPdfContext, + rawRequirementLength: searchQuery.rawRequirementLength, + rewriteAttempted: searchQuery.rewriteAttempted, + finalQueryLength: searchQuery.finalQueryLength, + }); + + const searchResult = await searchWithTavily({ + query: searchQuery.query, + apiKey: tavilyKey, + }); researchContext = formatSearchResultsAsContext(searchResult); if (researchContext) { log.info(`Web search returned ${searchResult.sources.length} sources`); diff --git a/lib/server/search-query-builder.ts b/lib/server/search-query-builder.ts new file mode 100644 index 00000000..4611c99c --- /dev/null +++ b/lib/server/search-query-builder.ts @@ -0,0 +1,99 @@ +import { parseJsonResponse } from '@/lib/generation/json-repair'; +import { PROMPT_IDS, buildPrompt } from '@/lib/generation/prompts'; +import type { AICallFn } from '@/lib/generation/pipeline-types'; +import { createLogger } from '@/lib/logger'; + +const log = createLogger('SearchQueryBuilder'); +const TAVILY_SOFT_MAX_QUERY_LENGTH = 350; +export const SEARCH_QUERY_REWRITE_EXCERPT_LENGTH = 7000; + +interface SearchQueryRewriteResponse { + query: string; +} + +export interface SearchQueryBuildResult { + query: string; + rewriteAttempted: boolean; + rawRequirementLength: number; + finalQueryLength: number; + hasPdfContext: boolean; +} + +function normalizeSearchRequirement(requirement: string): string { + return requirement.replace(/\s+/g, ' ').trim(); +} + +function normalizePdfExcerpt(pdfText?: string): string { + if (!pdfText) { + return ''; + } + + return pdfText.replace(/\s+/g, ' ').trim().slice(0, SEARCH_QUERY_REWRITE_EXCERPT_LENGTH); +} + +function shouldRewriteSearchQuery( + normalizedRequirement: string, + normalizedPdfExcerpt: string, +): boolean { + return normalizedRequirement.length > 400 || Boolean(normalizedPdfExcerpt); +} + +export async function buildSearchQuery( + requirement: string, + pdfText: string | undefined, + aiCall?: AICallFn, +): Promise { + const normalizedRequirement = normalizeSearchRequirement(requirement); + const pdfExcerpt = normalizePdfExcerpt(pdfText); + const hasPdfContext = Boolean(pdfExcerpt); + const rewriteAttempted = shouldRewriteSearchQuery(normalizedRequirement, pdfExcerpt); + + const fallback = { + query: normalizedRequirement, + rewriteAttempted, + rawRequirementLength: normalizedRequirement.length, + finalQueryLength: normalizedRequirement.length, + hasPdfContext, + } satisfies SearchQueryBuildResult; + + if (!normalizedRequirement || !rewriteAttempted) { + return fallback; + } + + if (!aiCall) { + log.warn('Query rewrite AI call unavailable, falling back to raw requirement'); + return fallback; + } + + const prompts = buildPrompt(PROMPT_IDS.WEB_SEARCH_QUERY_REWRITE, { + requirement: normalizedRequirement, + pdfExcerpt: pdfExcerpt || 'None', + }); + + if (!prompts) { + log.warn('Query rewrite prompt not found, falling back to raw requirement'); + return fallback; + } + + try { + const response = await aiCall(prompts.system, prompts.user); + const parsed = parseJsonResponse(response); + const rewrittenQuery = normalizeSearchRequirement(parsed?.query || '').slice( + 0, + TAVILY_SOFT_MAX_QUERY_LENGTH, + ); + if (!rewrittenQuery) { + log.warn('Query rewrite returned empty output, falling back to raw requirement'); + return fallback; + } + + return { + ...fallback, + query: rewrittenQuery, + finalQueryLength: rewrittenQuery.length, + }; + } catch (error) { + log.warn('Query rewrite failed, falling back to raw requirement:', error); + return fallback; + } +}