diff --git a/code/python/core/baseHandler.py b/code/python/core/baseHandler.py index ee0f1c31..9104b0ed 100644 --- a/code/python/core/baseHandler.py +++ b/code/python/core/baseHandler.py @@ -115,6 +115,9 @@ def __init__(self, query_params, http_handler): # Maximum number of results to return to the user self.max_results = get_param(query_params, "max_results", int, 10) + # Output format - can be "chatgptapp" for ChatGPT App spec v0.5 format + self.output_format = get_param(query_params, "output_format", str, "default") + # the items that have been retrieved from the vector database, could be before decontextualization. # See below notes on fasttrack self.retrieved_items = [] diff --git a/code/python/core/schemas.py b/code/python/core/schemas.py index 6b4643a6..9561546a 100644 --- a/code/python/core/schemas.py +++ b/code/python/core/schemas.py @@ -470,5 +470,84 @@ def create_legacy_message(message_type: str, content: Any, message["conversation_id"] = conversation_id if sender_info: message["sender_info"] = sender_info - - return message \ No newline at end of file + + return message + + +def format_response_to_chatgpt_spec(results: List[Dict[str, Any]], + conversation_id: Optional[str] = None, + text_description: Optional[str] = None, + version: str = "0.5", + additional_meta: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """ + Format NLWeb results according to the ChatGPT App specification v0.5. + + Args: + results: List of result items (each with @type, url, name, etc.) + conversation_id: Conversation identifier + text_description: Optional natural language description + version: API version number (default: "0.5") + additional_meta: Additional metadata fields (e.g., openai/outputTemplate) + + Returns: + Dict formatted according to ChatGPT App spec with _meta and content fields + + Example output: + { + "_meta": { + "conversation_id": "conv-123", + "version": "0.5" + }, + "content": [ + { + "type": "text", + "text": "Found 3 restaurants..." + }, + { + "type": "resource", + "resource": { + "data": [ + {"@type": "Restaurant", "name": "...", ...} + ] + } + } + ] + } + """ + # Build _meta object + meta = {"version": version} + if conversation_id: + meta["conversation_id"] = conversation_id + if additional_meta: + meta.update(additional_meta) + + # Build content array + content = [] + + # Add text item if provided + if text_description: + content.append({ + "type": "text", + "text": text_description + }) + + # Add resource item with data + if results: + # Determine if we have single or multiple results + if len(results) == 1: + data = results[0] + else: + data = results + + resource_item = { + "type": "resource", + "resource": { + "data": data + } + } + content.append(resource_item) + + return { + "_meta": meta, + "content": content + } \ No newline at end of file diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py index 1ea95e17..4cb428bf 100644 --- a/code/python/core/utils/message_senders.py +++ b/code/python/core/utils/message_senders.py @@ -107,14 +107,18 @@ async def send_begin_response(self): """Send begin-nlweb-response message at the start of query processing.""" if not (self.handler.streaming and self.handler.http_handler is not None): return - + + # Skip for chatgptapp format + if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp': + return + begin_message = { "message_type": "begin-nlweb-response", "conversation_id": self.handler.conversation_id, "query": self.handler.query, "timestamp": int(time.time() * 1000) } - + try: await self.handler.http_handler.write_stream(begin_message) except Exception: @@ -123,22 +127,26 @@ async def send_begin_response(self): async def send_end_response(self, error=False): """ Send end-nlweb-response message at the end of query processing. - + Args: error: If True, indicates the query ended with an error """ if not (self.handler.streaming and self.handler.http_handler is not None): return - + + # Skip for chatgptapp format + if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp': + return + end_message = { "message_type": "end-nlweb-response", "conversation_id": self.handler.conversation_id, "timestamp": int(time.time() * 1000) } - + if error: end_message["error"] = True - + try: await self.handler.http_handler.write_stream(end_message) except Exception: @@ -281,26 +289,145 @@ async def send_message(self, message): message_type = message.get('message_type', 'unknown') # print(f"[MessageSender] Sending message type: {message_type}") message = self.add_message_metadata(message) - + # Always store the message (for both streaming and non-streaming) self.store_message(message) - + if (self.handler.streaming and self.handler.http_handler is not None): # Streaming mode: also send via write_stream - + # Check if this is the first result and add time-to-first-result header if message.get("message_type") == "result" and not self.handler.first_result_sent: self.handler.first_result_sent = True await self.send_time_to_first_result() - + # Send headers if not already sent await self._send_headers_if_needed(is_streaming=True) - + try: - await self.handler.http_handler.write_stream(message) + # For chatgptapp format, send _meta block first + if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp': + if message.get('message_type') == 'result': + await self._send_chatgptapp_meta_if_needed(message.get('conversation_id')) + + # Transform to ChatGPT App format if requested + output_message = self._transform_to_output_format(message) + + # Skip sending if transform returned None (e.g., for captured query_rewrite) + if output_message is not None: + await self.handler.http_handler.write_stream(output_message) except Exception as e: self.handler.connection_alive_event.clear() # Use event instead of flag else: # Non-streaming mode: just store (already done above) # Send headers if not already sent - await self._send_headers_if_needed(is_streaming=False) \ No newline at end of file + await self._send_headers_if_needed(is_streaming=False) + + async def _send_chatgptapp_meta_if_needed(self, conversation_id): + """Send _meta block once for chatgptapp format.""" + if not hasattr(self.handler, '_chatgptapp_meta_sent') or not self.handler._chatgptapp_meta_sent: + self.handler._chatgptapp_meta_sent = True + + meta_message = { + "_meta": { + "openai/outputTemplate": "ui://widget/list.html", + "nlweb/version": "0.5" + } + } + + # Include conversation_id only if present and non-empty + if conversation_id: + meta_message["_meta"]["nlweb/conversationId"] = conversation_id + + # Include decontextualized_query only if it differs from original query + if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query: + # Skip if same as original query + if self.handler.decontextualized_query != self.handler.query: + meta_message["_meta"]["nlweb/decontextualizedQuery"] = self.handler.decontextualized_query + + try: + await self.handler.http_handler.write_stream(meta_message) + except Exception as e: + self.handler.connection_alive_event.clear() + raise + + def _transform_to_output_format(self, message): + """ + Transform message to requested output format (e.g., chatgptapp). + + For chatgptapp format, only _meta and content messages are allowed. + All other message types are filtered out. + """ + # Check if chatgptapp format is requested + if not hasattr(self.handler, 'output_format') or self.handler.output_format != 'chatgptapp': + return message + + # For chatgptapp format, only allow 'result' message_type + # All other message types should be filtered out + message_type = message.get('message_type') + if message_type != 'result': + # Skip all non-result messages for chatgptapp format + # This includes: decontextualized_query, query_rewrite, asking_sites, + # site_querying, site_complete, site_error, intermediate_message, + # tool_selection, tool_routing, nlws, ensemble_result, etc. + return None + + # Extract content (results array) + content = message.get('content', []) + if not content: + return None + + # Transform each item: flatten schema_object fields and convert to grounding + transformed_content = [] + for item in content: + transformed_item = self._transform_item_for_chatgptapp(item) + transformed_content.append(transformed_item) + + # Transform content to resource format + resource_content = { + "content": [{ + "type": "resource", + "resource": { + "data": transformed_content + } + }] + } + + return resource_content + + def _transform_item_for_chatgptapp(self, item): + """ + Transform a single item for chatgptapp format: + - Flatten schema_object fields to top level + - Replace schema_object with grounding field containing the URL + """ + if not isinstance(item, dict): + return item + + # Create a copy to avoid modifying original + transformed = dict(item) + + # Check if schema_object exists + if 'schema_object' in transformed: + schema_obj = transformed['schema_object'] + + if isinstance(schema_obj, dict): + # Extract the URL for grounding (use item's url field) + url = transformed.get('url', '') + + # Flatten all fields from schema_object to top level + for key, value in schema_obj.items(): + # Only add if not already present at top level + if key not in transformed: + transformed[key] = value + + # If schema_object has @type and top level @type is "Item", replace with schema @type + if '@type' in schema_obj and transformed.get('@type') == 'Item': + transformed['@type'] = schema_obj['@type'] + + # Replace schema_object with grounding + del transformed['schema_object'] + if url: + transformed['grounding'] = url + + return transformed \ No newline at end of file diff --git a/code/python/webserver/mcp_wrapper.py b/code/python/webserver/mcp_wrapper.py index 1b32d7e1..7ca1305f 100644 --- a/code/python/webserver/mcp_wrapper.py +++ b/code/python/webserver/mcp_wrapper.py @@ -172,6 +172,12 @@ async def handle_tools_list(self, params): "enum": ["list", "generate", "summarize"], "description": "The type of response to generate", "default": "list" + }, + "output_format": { + "type": "string", + "enum": ["default", "chatgptapp"], + "description": "Output format for the response. Use 'chatgptapp' for ChatGPT App spec v0.5 format", + "default": "chatgptapp" } }, "required": ["query"] @@ -304,13 +310,15 @@ async def handle_tools_call(self, params, query_params): # print(f"Query: {query}") sites = arguments.get("site", []) generate_mode = arguments.get("generate_mode", "list") - + output_format = arguments.get("output_format", "chatgptapp") # Default to chatgptapp for MCP + # Update query params with MCP arguments # Make sure to format values as lists (like URL parameters) query_params["query"] = [query] if query else [] if sites: query_params["site"] = sites if isinstance(sites, list) else [sites] query_params["generate_mode"] = [generate_mode] if generate_mode else ["list"] + query_params["output_format"] = [output_format] # print(f"=== QUERY PARAMS BEING PASSED ===") # print(f"query_params: {query_params}") diff --git a/static/index.html b/static/index.html index b9f2a85e..81dbc7e8 100644 --- a/static/index.html +++ b/static/index.html @@ -221,9 +221,9 @@

Login with Email

// Parse ALL URL parameters const urlParams = new URLSearchParams(window.location.search); - // Extract connection mode + // Extract connection mode - default to SSE const mode = urlParams.get('mode') || urlParams.get('connection'); - const connectionType = (mode === 'http' || mode === 'sse') ? 'sse' : 'websocket'; + const connectionType = (mode === 'websocket') ? 'websocket' : 'sse'; // Collect all additional parameters to pass to backend const additionalParams = {};