nlweb-ai · rvguha · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/code/python/core/baseHandler.py b/code/python/core/baseHandler.py
@@ -115,6 +115,9 @@ def __init__(self, query_params, http_handler):
         # Maximum number of results to return to the user
         self.max_results = get_param(query_params, "max_results", int, 10)
 
+        # Output format - can be "chatgptapp" for ChatGPT App spec v0.5 format
+        self.output_format = get_param(query_params, "output_format", str, "default")
+
         # the items that have been retrieved from the vector database, could be before decontextualization.
         # See below notes on fasttrack
         self.retrieved_items = []

diff --git a/code/python/core/schemas.py b/code/python/core/schemas.py
@@ -470,5 +470,84 @@ def create_legacy_message(message_type: str, content: Any,
         message["conversation_id"] = conversation_id
     if sender_info:
         message["sender_info"] = sender_info
-
-    return message
+
+    return message
+
+
+def format_response_to_chatgpt_spec(results: List[Dict[str, Any]],
+                                     conversation_id: Optional[str] = None,
+                                     text_description: Optional[str] = None,
+                                     version: str = "0.5",
+                                     additional_meta: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """
+    Format NLWeb results according to the ChatGPT App specification v0.5.
+
+    Args:
+        results: List of result items (each with @type, url, name, etc.)
+        conversation_id: Conversation identifier
+        text_description: Optional natural language description
+        version: API version number (default: "0.5")
+        additional_meta: Additional metadata fields (e.g., openai/outputTemplate)
+
+    Returns:
+        Dict formatted according to ChatGPT App spec with _meta and content fields
+
+    Example output:
+        {
+            "_meta": {
+                "conversation_id": "conv-123",
+                "version": "0.5"
+            },
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Found 3 restaurants..."
+                },
+                {
+                    "type": "resource",
+                    "resource": {
+                        "data": [
+                            {"@type": "Restaurant", "name": "...", ...}
+                        ]
+                    }
+                }
+            ]
+        }
+    """
+    # Build _meta object
+    meta = {"version": version}
+    if conversation_id:
+        meta["conversation_id"] = conversation_id
+    if additional_meta:
+        meta.update(additional_meta)
+
+    # Build content array
+    content = []
+
+    # Add text item if provided
+    if text_description:
+        content.append({
+            "type": "text",
+            "text": text_description
+        })
+
+    # Add resource item with data
+    if results:
+        # Determine if we have single or multiple results
+        if len(results) == 1:
+            data = results[0]
+        else:
+            data = results
+
+        resource_item = {
+            "type": "resource",
+            "resource": {
+                "data": data
+            }
+        }
+        content.append(resource_item)
+
+    return {
+        "_meta": meta,
+        "content": content
+    }
diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py
@@ -107,14 +107,18 @@ async def send_begin_response(self):
         """Send begin-nlweb-response message at the start of query processing."""
         if not (self.handler.streaming and self.handler.http_handler is not None):
             return
-
+
+        # Skip for chatgptapp format
+        if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp':
+            return
+
         begin_message = {
             "message_type": "begin-nlweb-response",
             "conversation_id": self.handler.conversation_id,
             "query": self.handler.query,
             "timestamp": int(time.time() * 1000)
         }
-        
+
         try:
             await self.handler.http_handler.write_stream(begin_message)
         except Exception:
@@ -123,22 +127,26 @@ async def send_begin_response(self):
     async def send_end_response(self, error=False):
         """
         Send end-nlweb-response message at the end of query processing.
-        
+
         Args:
             error: If True, indicates the query ended with an error
         """
         if not (self.handler.streaming and self.handler.http_handler is not None):
             return
-
+
+        # Skip for chatgptapp format
+        if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp':
+            return
+
         end_message = {
             "message_type": "end-nlweb-response",
             "conversation_id": self.handler.conversation_id,
             "timestamp": int(time.time() * 1000)
         }
-        
+
         if error:
             end_message["error"] = True
-        
+
         try:
             await self.handler.http_handler.write_stream(end_message)
         except Exception:
@@ -281,26 +289,145 @@ async def send_message(self, message):
         message_type = message.get('message_type', 'unknown')
         # print(f"[MessageSender] Sending message type: {message_type}")
         message = self.add_message_metadata(message)
-            
+
         # Always store the message (for both streaming and non-streaming)
         self.store_message(message)
-            
+
         if (self.handler.streaming and self.handler.http_handler is not None):
                 # Streaming mode: also send via write_stream
-                
+
             # Check if this is the first result and add time-to-first-result header
             if message.get("message_type") == "result" and not self.handler.first_result_sent:
                 self.handler.first_result_sent = True
                 await self.send_time_to_first_result()
-                
+
             # Send headers if not already sent
             await self._send_headers_if_needed(is_streaming=True)
-                
+
             try:
-                await self.handler.http_handler.write_stream(message)
+                # For chatgptapp format, send _meta block first
+                if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp':
+                    if message.get('message_type') == 'result':
+                        await self._send_chatgptapp_meta_if_needed(message.get('conversation_id'))
+
+                # Transform to ChatGPT App format if requested
+                output_message = self._transform_to_output_format(message)
+
+                # Skip sending if transform returned None (e.g., for captured query_rewrite)
+                if output_message is not None:
+                    await self.handler.http_handler.write_stream(output_message)
             except Exception as e:
                 self.handler.connection_alive_event.clear()  # Use event instead of flag
         else:
             # Non-streaming mode: just store (already done above)
             # Send headers if not already sent
-            await self._send_headers_if_needed(is_streaming=False)
+            await self._send_headers_if_needed(is_streaming=False)
+
+    async def _send_chatgptapp_meta_if_needed(self, conversation_id):
+        """Send _meta block once for chatgptapp format."""
+        if not hasattr(self.handler, '_chatgptapp_meta_sent') or not self.handler._chatgptapp_meta_sent:
+            self.handler._chatgptapp_meta_sent = True
+
+            meta_message = {
+                "_meta": {
+                    "openai/outputTemplate": "ui://widget/list.html",
+                    "nlweb/version": "0.5"
+                }
+            }
+
+            # Include conversation_id only if present and non-empty
+            if conversation_id:
+                meta_message["_meta"]["nlweb/conversationId"] = conversation_id
+
+            # Include decontextualized_query only if it differs from original query
+            if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query:
+                # Skip if same as original query
+                if self.handler.decontextualized_query != self.handler.query:
+                    meta_message["_meta"]["nlweb/decontextualizedQuery"] = self.handler.decontextualized_query
+
+            try:
+                await self.handler.http_handler.write_stream(meta_message)
+            except Exception as e:
+                self.handler.connection_alive_event.clear()
+                raise
+
+    def _transform_to_output_format(self, message):
+        """
+        Transform message to requested output format (e.g., chatgptapp).
+
+        For chatgptapp format, only _meta and content messages are allowed.
+        All other message types are filtered out.
+        """
+        # Check if chatgptapp format is requested
+        if not hasattr(self.handler, 'output_format') or self.handler.output_format != 'chatgptapp':
+            return message
+
+        # For chatgptapp format, only allow 'result' message_type
+        # All other message types should be filtered out
+        message_type = message.get('message_type')
+        if message_type != 'result':
+            # Skip all non-result messages for chatgptapp format
+            # This includes: decontextualized_query, query_rewrite, asking_sites,
+            # site_querying, site_complete, site_error, intermediate_message,
+            # tool_selection, tool_routing, nlws, ensemble_result, etc.
+            return None
+
+        # Extract content (results array)
+        content = message.get('content', [])
+        if not content:
+            return None
+
+        # Transform each item: flatten schema_object fields and convert to grounding
+        transformed_content = []
+        for item in content:
+            transformed_item = self._transform_item_for_chatgptapp(item)
+            transformed_content.append(transformed_item)
+
+        # Transform content to resource format
+        resource_content = {
+            "content": [{
+                "type": "resource",
+                "resource": {
+                    "data": transformed_content
+                }
+            }]
+        }
+
+        return resource_content
+
+    def _transform_item_for_chatgptapp(self, item):
+        """
+        Transform a single item for chatgptapp format:
+        - Flatten schema_object fields to top level
+        - Replace schema_object with grounding field containing the URL
+        """
+        if not isinstance(item, dict):
+            return item
+
+        # Create a copy to avoid modifying original
+        transformed = dict(item)
+
+        # Check if schema_object exists
+        if 'schema_object' in transformed:
+            schema_obj = transformed['schema_object']
+
+            if isinstance(schema_obj, dict):
+                # Extract the URL for grounding (use item's url field)
+                url = transformed.get('url', '')
+
+                # Flatten all fields from schema_object to top level
+                for key, value in schema_obj.items():
+                    # Only add if not already present at top level
+                    if key not in transformed:
+                        transformed[key] = value
+
+                # If schema_object has @type and top level @type is "Item", replace with schema @type
+                if '@type' in schema_obj and transformed.get('@type') == 'Item':
+                    transformed['@type'] = schema_obj['@type']
+
+                # Replace schema_object with grounding
+                del transformed['schema_object']
+                if url:
+                    transformed['grounding'] = url
+
+        return transformed
diff --git a/code/python/webserver/mcp_wrapper.py b/code/python/webserver/mcp_wrapper.py
@@ -172,6 +172,12 @@ async def handle_tools_list(self, params):
                         "enum": ["list", "generate", "summarize"],
                         "description": "The type of response to generate",
                         "default": "list"
+                    },
+                    "output_format": {
+                        "type": "string",
+                        "enum": ["default", "chatgptapp"],
+                        "description": "Output format for the response. Use 'chatgptapp' for ChatGPT App spec v0.5 format",
+                        "default": "chatgptapp"
                     }
                 },
                 "required": ["query"]
@@ -304,13 +310,15 @@ async def handle_tools_call(self, params, query_params):
             # print(f"Query: {query}")
             sites = arguments.get("site", [])
             generate_mode = arguments.get("generate_mode", "list")
-
+            output_format = arguments.get("output_format", "chatgptapp")  # Default to chatgptapp for MCP
+
             # Update query params with MCP arguments
             # Make sure to format values as lists (like URL parameters)
             query_params["query"] = [query] if query else []
             if sites:
                 query_params["site"] = sites if isinstance(sites, list) else [sites]
             query_params["generate_mode"] = [generate_mode] if generate_mode else ["list"]
+            query_params["output_format"] = [output_format]
             # print(f"=== QUERY PARAMS BEING PASSED ===")
             # print(f"query_params: {query_params}")
 

diff --git a/static/index.html b/static/index.html
@@ -221,9 +221,9 @@ <h3>Login with Email</h3>
     // Parse ALL URL parameters
     const urlParams = new URLSearchParams(window.location.search);
 
-    // Extract connection mode
+    // Extract connection mode - default to SSE
     const mode = urlParams.get('mode') || urlParams.get('connection');
-    const connectionType = (mode === 'http' || mode === 'sse') ? 'sse' : 'websocket';
+    const connectionType = (mode === 'websocket') ? 'websocket' : 'sse';
 
     // Collect all additional parameters to pass to backend
     const additionalParams = {};