2626)
2727from cairo_coder .dspy .document_retriever import DocumentRetrieverProgram
2828from cairo_coder .dspy .generation_program import GenerationProgram , McpGenerationProgram
29+ from cairo_coder .dspy .grok_search import GrokSearchProgram
2930from cairo_coder .dspy .query_processor import QueryProcessorProgram
3031from cairo_coder .dspy .retrieval_judge import RetrievalJudge
3132
@@ -73,6 +74,8 @@ def __init__(self, config: RagPipelineConfig):
7374 self .generation_program = config .generation_program
7475 self .mcp_generation_program = config .mcp_generation_program
7576 self .retrieval_judge = RetrievalJudge ()
77+ self .grok_search = GrokSearchProgram ()
78+ self ._grok_citations : list [str ] = []
7679
7780 # Pipeline state
7881 self ._current_processed_query : ProcessedQuery | None = None
@@ -96,6 +99,22 @@ async def _aprocess_query_and_retrieve_docs(
9699 processed_query = processed_query , sources = retrieval_sources
97100 )
98101
102+ # Optional Grok web/X augmentation: activate when STARKNET_BLOG is among sources.
103+ try :
104+ if DocumentSource .STARKNET_BLOG in retrieval_sources :
105+ grok_docs = await self .grok_search .aforward (processed_query )
106+ self ._grok_citations = list (self .grok_search .last_citations )
107+ if grok_docs :
108+ documents .extend (grok_docs )
109+ grok_summary_doc = next ((d for d in grok_docs if d .metadata .get ("name" ) == "grok-answer" ), None )
110+ else :
111+ self ._grok_citations = []
112+ grok_summary_doc = None
113+ except Exception as e :
114+ logger .warning ("Grok augmentation failed; continuing without it" , error = str (e ), exc_info = True )
115+ grok_summary_doc = None
116+ self ._grok_citations = []
117+
99118 try :
100119 with dspy .context (
101120 lm = dspy .LM ("gemini/gemini-flash-lite-latest" , max_tokens = 10000 , temperature = 0.5 ),
@@ -110,6 +129,16 @@ async def _aprocess_query_and_retrieve_docs(
110129 )
111130 # documents already contains all retrieved docs, no action needed
112131
132+ # Ensure Grok summary is present and first in order (for generation context)
133+ try :
134+ if grok_summary_doc is not None :
135+ if grok_summary_doc in documents :
136+ documents = [grok_summary_doc ] + [d for d in documents if d is not grok_summary_doc ]
137+ else :
138+ documents = [grok_summary_doc ] + documents
139+ except Exception :
140+ pass
141+
113142 self ._current_documents = documents
114143
115144 return processed_query , documents
@@ -290,14 +319,34 @@ def _format_sources(self, documents: list[Document]) -> list[dict[str, Any]]:
290319 List of dicts: [{"title": str, "url": str}, ...]
291320 """
292321 sources : list [dict [str , str ]] = []
322+
323+ # Helper to extract domain title
324+ def title_from_url (url : str ) -> str :
325+ try :
326+ import urllib .parse as _up
327+
328+ host = _up .urlparse (url ).netloc
329+ return host or url
330+ except Exception :
331+ return url
332+
333+ # 1) Vector store and other docs (skip Grok summary virtual doc)
293334 for doc in documents :
335+ if doc .metadata .get ("name" ) == "grok-answer" or doc .metadata .get ("is_virtual" ):
336+ continue
294337 if doc .source_link is None :
295338 logger .warning (f"Document { doc .title } has no source link" )
296- to_append = ( {"metadata" : {"title" : doc .title , "url" : "" }})
339+ to_append = {"metadata" : {"title" : doc .title , "url" : "" }}
297340 else :
298- to_append = ( {"metadata" : {"title" : doc .title , "url" : doc .source_link }})
341+ to_append = {"metadata" : {"title" : doc .title , "url" : doc .source_link }}
299342 sources .append (to_append )
300343
344+ # 2) Append Grok citations (raw URLs)
345+ for url in self ._grok_citations :
346+ if not url :
347+ continue
348+ sources .append ({"metadata" : {"title" : title_from_url (url ), "url" : url }})
349+
301350 return sources
302351
303352 def _prepare_context (self , documents : list [Document ]) -> str :
@@ -325,11 +374,12 @@ def _prepare_context(self, documents: list[Document]) -> str:
325374 for i , doc in enumerate (documents , 1 ):
326375 source_name = doc .metadata .get ("source_display" , "Unknown Source" )
327376 title = doc .metadata .get ("title" , f"Document { i } " )
328- url = doc .metadata .get ("url" , "#" )
377+ url = doc .metadata .get ("url" )
329378
330379 context_parts .append (f"## { i } . { title } " )
331380 context_parts .append (f"Source: { source_name } " )
332- context_parts .append (f"URL: { url } " )
381+ if url :
382+ context_parts .append (f"URL: { url } " )
333383 context_parts .append ("" )
334384 context_parts .append (doc .page_content )
335385 context_parts .append ("" )
0 commit comments