From 2783890952f08d6acc6ba9380734949c5a6f9e92 Mon Sep 17 00:00:00 2001 From: "R.V.Guha" Date: Mon, 27 Oct 2025 06:08:04 -0700 Subject: [PATCH 1/5] Add ChatGPT App SDK output format support Implement ChatGPT App specification v0.5 output format to enable rich UI widgets in ChatGPT. This allows NLWeb to display search results using ChatGPT's native list widgets. Backend changes: - Add output_format parameter to NLWebHandler (default: "default") - Transform responses to ChatGPT App format when output_format=chatgptapp - Send _meta block once with conversation_id, version, openai/outputTemplate, query_rewrite, and decontextualized_query - Stream content blocks as {"type": "resource", "resource": {"data": [...]}} - Filter out non-essential messages (begin/end-nlweb-response, query_rewrite, decontextualized_query, etc.) - Only _meta and content messages are sent for chatgptapp format - MCP defaults to chatgptapp format Frontend changes: - Set SSE as default connection type (was WebSocket) - Users can still use WebSocket with ?mode=websocket Key features: - Full backward compatibility - default format unchanged - Preserves all data fields from original format - Enables ChatGPT Apps SDK generic list widget (ui://widget/list.html) - Works with any schema.org type (Restaurant, Recipe, Article, Product, etc.) - Clean output with only _meta and content blocks --- code/python/core/baseHandler.py | 3 + code/python/core/schemas.py | 83 ++++++++++++++- code/python/core/utils/message_senders.py | 118 +++++++++++++++++++--- code/python/webserver/mcp_wrapper.py | 10 +- static/index.html | 4 +- 5 files changed, 200 insertions(+), 18 deletions(-) diff --git a/code/python/core/baseHandler.py b/code/python/core/baseHandler.py index ee0f1c31..9104b0ed 100644 --- a/code/python/core/baseHandler.py +++ b/code/python/core/baseHandler.py @@ -115,6 +115,9 @@ def __init__(self, query_params, http_handler): # Maximum number of results to return to the user self.max_results = get_param(query_params, "max_results", int, 10) + # Output format - can be "chatgptapp" for ChatGPT App spec v0.5 format + self.output_format = get_param(query_params, "output_format", str, "default") + # the items that have been retrieved from the vector database, could be before decontextualization. # See below notes on fasttrack self.retrieved_items = [] diff --git a/code/python/core/schemas.py b/code/python/core/schemas.py index 6b4643a6..9561546a 100644 --- a/code/python/core/schemas.py +++ b/code/python/core/schemas.py @@ -470,5 +470,84 @@ def create_legacy_message(message_type: str, content: Any, message["conversation_id"] = conversation_id if sender_info: message["sender_info"] = sender_info - - return message \ No newline at end of file + + return message + + +def format_response_to_chatgpt_spec(results: List[Dict[str, Any]], + conversation_id: Optional[str] = None, + text_description: Optional[str] = None, + version: str = "0.5", + additional_meta: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """ + Format NLWeb results according to the ChatGPT App specification v0.5. + + Args: + results: List of result items (each with @type, url, name, etc.) + conversation_id: Conversation identifier + text_description: Optional natural language description + version: API version number (default: "0.5") + additional_meta: Additional metadata fields (e.g., openai/outputTemplate) + + Returns: + Dict formatted according to ChatGPT App spec with _meta and content fields + + Example output: + { + "_meta": { + "conversation_id": "conv-123", + "version": "0.5" + }, + "content": [ + { + "type": "text", + "text": "Found 3 restaurants..." + }, + { + "type": "resource", + "resource": { + "data": [ + {"@type": "Restaurant", "name": "...", ...} + ] + } + } + ] + } + """ + # Build _meta object + meta = {"version": version} + if conversation_id: + meta["conversation_id"] = conversation_id + if additional_meta: + meta.update(additional_meta) + + # Build content array + content = [] + + # Add text item if provided + if text_description: + content.append({ + "type": "text", + "text": text_description + }) + + # Add resource item with data + if results: + # Determine if we have single or multiple results + if len(results) == 1: + data = results[0] + else: + data = results + + resource_item = { + "type": "resource", + "resource": { + "data": data + } + } + content.append(resource_item) + + return { + "_meta": meta, + "content": content + } \ No newline at end of file diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py index 1ea95e17..af6bff6a 100644 --- a/code/python/core/utils/message_senders.py +++ b/code/python/core/utils/message_senders.py @@ -107,14 +107,18 @@ async def send_begin_response(self): """Send begin-nlweb-response message at the start of query processing.""" if not (self.handler.streaming and self.handler.http_handler is not None): return - + + # Skip for chatgptapp format + if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp': + return + begin_message = { "message_type": "begin-nlweb-response", "conversation_id": self.handler.conversation_id, "query": self.handler.query, "timestamp": int(time.time() * 1000) } - + try: await self.handler.http_handler.write_stream(begin_message) except Exception: @@ -123,22 +127,26 @@ async def send_begin_response(self): async def send_end_response(self, error=False): """ Send end-nlweb-response message at the end of query processing. - + Args: error: If True, indicates the query ended with an error """ if not (self.handler.streaming and self.handler.http_handler is not None): return - + + # Skip for chatgptapp format + if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp': + return + end_message = { "message_type": "end-nlweb-response", "conversation_id": self.handler.conversation_id, "timestamp": int(time.time() * 1000) } - + if error: end_message["error"] = True - + try: await self.handler.http_handler.write_stream(end_message) except Exception: @@ -281,26 +289,110 @@ async def send_message(self, message): message_type = message.get('message_type', 'unknown') # print(f"[MessageSender] Sending message type: {message_type}") message = self.add_message_metadata(message) - + # Always store the message (for both streaming and non-streaming) self.store_message(message) - + if (self.handler.streaming and self.handler.http_handler is not None): # Streaming mode: also send via write_stream - + # Check if this is the first result and add time-to-first-result header if message.get("message_type") == "result" and not self.handler.first_result_sent: self.handler.first_result_sent = True await self.send_time_to_first_result() - + # Send headers if not already sent await self._send_headers_if_needed(is_streaming=True) - + try: - await self.handler.http_handler.write_stream(message) + # For chatgptapp format, send _meta block first + if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp': + if message.get('message_type') == 'result': + await self._send_chatgptapp_meta_if_needed(message.get('conversation_id')) + + # Transform to ChatGPT App format if requested + output_message = self._transform_to_output_format(message) + + # Skip sending if transform returned None (e.g., for captured query_rewrite) + if output_message is not None: + await self.handler.http_handler.write_stream(output_message) except Exception as e: self.handler.connection_alive_event.clear() # Use event instead of flag else: # Non-streaming mode: just store (already done above) # Send headers if not already sent - await self._send_headers_if_needed(is_streaming=False) \ No newline at end of file + await self._send_headers_if_needed(is_streaming=False) + + async def _send_chatgptapp_meta_if_needed(self, conversation_id): + """Send _meta block once for chatgptapp format.""" + if not hasattr(self.handler, '_chatgptapp_meta_sent') or not self.handler._chatgptapp_meta_sent: + self.handler._chatgptapp_meta_sent = True + + meta_message = { + "_meta": { + "conversation_id": conversation_id or '', + "version": "0.5", + "openai/outputTemplate": "ui://widget/list.html" + } + } + + # Include query_rewrite data if available + if hasattr(self.handler, '_chatgptapp_query_rewrite'): + meta_message["_meta"]["query_rewrite"] = self.handler._chatgptapp_query_rewrite + + # Include decontextualized_query if available + if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query: + meta_message["_meta"]["decontextualized_query"] = self.handler.decontextualized_query + + try: + await self.handler.http_handler.write_stream(meta_message) + except Exception as e: + self.handler.connection_alive_event.clear() + raise + + def _transform_to_output_format(self, message): + """ + Transform message to requested output format (e.g., chatgptapp). + + For chatgptapp format, only _meta and content messages are allowed. + All other message types are filtered out. + """ + # Check if chatgptapp format is requested + if not hasattr(self.handler, 'output_format') or self.handler.output_format != 'chatgptapp': + return message + + # Capture query_rewrite message and skip sending it (will be included in _meta) + if message.get('message_type') == 'query_rewrite': + # Store the query_rewrite data for inclusion in _meta + self.handler._chatgptapp_query_rewrite = { + "original_query": message.get('original_query'), + "rewritten_queries": message.get('rewritten_queries') + } + return None + + # For chatgptapp format, only allow 'result' message_type + # All other message types should be filtered out + message_type = message.get('message_type') + if message_type != 'result': + # Skip all non-result messages for chatgptapp format + # This includes: decontextualized_query, query_rewrite, asking_sites, + # site_querying, site_complete, site_error, intermediate_message, + # tool_selection, tool_routing, nlws, ensemble_result, etc. + return None + + # Extract content (results array) + content = message.get('content', []) + if not content: + return None + + # Transform content to resource format + resource_content = { + "content": [{ + "type": "resource", + "resource": { + "data": content + } + }] + } + + return resource_content \ No newline at end of file diff --git a/code/python/webserver/mcp_wrapper.py b/code/python/webserver/mcp_wrapper.py index 1b32d7e1..7ca1305f 100644 --- a/code/python/webserver/mcp_wrapper.py +++ b/code/python/webserver/mcp_wrapper.py @@ -172,6 +172,12 @@ async def handle_tools_list(self, params): "enum": ["list", "generate", "summarize"], "description": "The type of response to generate", "default": "list" + }, + "output_format": { + "type": "string", + "enum": ["default", "chatgptapp"], + "description": "Output format for the response. Use 'chatgptapp' for ChatGPT App spec v0.5 format", + "default": "chatgptapp" } }, "required": ["query"] @@ -304,13 +310,15 @@ async def handle_tools_call(self, params, query_params): # print(f"Query: {query}") sites = arguments.get("site", []) generate_mode = arguments.get("generate_mode", "list") - + output_format = arguments.get("output_format", "chatgptapp") # Default to chatgptapp for MCP + # Update query params with MCP arguments # Make sure to format values as lists (like URL parameters) query_params["query"] = [query] if query else [] if sites: query_params["site"] = sites if isinstance(sites, list) else [sites] query_params["generate_mode"] = [generate_mode] if generate_mode else ["list"] + query_params["output_format"] = [output_format] # print(f"=== QUERY PARAMS BEING PASSED ===") # print(f"query_params: {query_params}") diff --git a/static/index.html b/static/index.html index b9f2a85e..81dbc7e8 100644 --- a/static/index.html +++ b/static/index.html @@ -221,9 +221,9 @@

Login with Email

// Parse ALL URL parameters const urlParams = new URLSearchParams(window.location.search); - // Extract connection mode + // Extract connection mode - default to SSE const mode = urlParams.get('mode') || urlParams.get('connection'); - const connectionType = (mode === 'http' || mode === 'sse') ? 'sse' : 'websocket'; + const connectionType = (mode === 'websocket') ? 'websocket' : 'sse'; // Collect all additional parameters to pass to backend const additionalParams = {}; From 4d805bc0974c700d073ea0b8ef7ad4c47a38ffa6 Mon Sep 17 00:00:00 2001 From: "R.V.Guha" Date: Mon, 27 Oct 2025 10:43:03 -0700 Subject: [PATCH 2/5] Use nlweb namespace for custom _meta fields in chatgptapp format Change field names in _meta to use nlweb/ prefix for NLWeb-specific fields: - conversation_id -> nlweb/conversationId - version -> nlweb/version - query_rewrite -> nlweb/queryRewrite - decontextualized_query -> nlweb/decontextualizedQuery This follows the ChatGPT App SDK convention of namespacing custom fields to avoid conflicts with OpenAI fields like openai/outputTemplate. --- code/python/core/utils/message_senders.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py index af6bff6a..8043fd3a 100644 --- a/code/python/core/utils/message_senders.py +++ b/code/python/core/utils/message_senders.py @@ -330,19 +330,22 @@ async def _send_chatgptapp_meta_if_needed(self, conversation_id): meta_message = { "_meta": { - "conversation_id": conversation_id or '', - "version": "0.5", - "openai/outputTemplate": "ui://widget/list.html" + "openai/outputTemplate": "ui://widget/list.html", + "nlweb/version": "0.5" } } # Include query_rewrite data if available if hasattr(self.handler, '_chatgptapp_query_rewrite'): - meta_message["_meta"]["query_rewrite"] = self.handler._chatgptapp_query_rewrite + meta_message["_meta"]["nlweb/queryRewrite"] = self.handler._chatgptapp_query_rewrite # Include decontextualized_query if available if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query: - meta_message["_meta"]["decontextualized_query"] = self.handler.decontextualized_query + meta_message["_meta"]["nlweb/decontextualizedQuery"] = self.handler.decontextualized_query + + # Include conversation_id if available + if conversation_id: + meta_message["_meta"]["nlweb/conversationId"] = conversation_id try: await self.handler.http_handler.write_stream(meta_message) From 1247e263ef3d380a08b4eb189138c6fb78ce6bc1 Mon Sep 17 00:00:00 2001 From: "R.V.Guha" Date: Mon, 27 Oct 2025 10:47:25 -0700 Subject: [PATCH 3/5] Clean up _meta fields for chatgptapp format - Skip nlweb/conversationId if empty - Skip nlweb/decontextualizedQuery if same as original query - Remove nlweb/queryRewrite field entirely (not needed) This keeps the _meta block minimal with only relevant fields. --- code/python/core/utils/message_senders.py | 25 +++++++---------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py index 8043fd3a..cc7dcce7 100644 --- a/code/python/core/utils/message_senders.py +++ b/code/python/core/utils/message_senders.py @@ -335,18 +335,16 @@ async def _send_chatgptapp_meta_if_needed(self, conversation_id): } } - # Include query_rewrite data if available - if hasattr(self.handler, '_chatgptapp_query_rewrite'): - meta_message["_meta"]["nlweb/queryRewrite"] = self.handler._chatgptapp_query_rewrite - - # Include decontextualized_query if available - if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query: - meta_message["_meta"]["nlweb/decontextualizedQuery"] = self.handler.decontextualized_query - - # Include conversation_id if available + # Include conversation_id only if present and non-empty if conversation_id: meta_message["_meta"]["nlweb/conversationId"] = conversation_id + # Include decontextualized_query only if it differs from original query + if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query: + # Skip if same as original query + if self.handler.decontextualized_query != self.handler.query: + meta_message["_meta"]["nlweb/decontextualizedQuery"] = self.handler.decontextualized_query + try: await self.handler.http_handler.write_stream(meta_message) except Exception as e: @@ -364,15 +362,6 @@ def _transform_to_output_format(self, message): if not hasattr(self.handler, 'output_format') or self.handler.output_format != 'chatgptapp': return message - # Capture query_rewrite message and skip sending it (will be included in _meta) - if message.get('message_type') == 'query_rewrite': - # Store the query_rewrite data for inclusion in _meta - self.handler._chatgptapp_query_rewrite = { - "original_query": message.get('original_query'), - "rewritten_queries": message.get('rewritten_queries') - } - return None - # For chatgptapp format, only allow 'result' message_type # All other message types should be filtered out message_type = message.get('message_type') From b12985bd26a0a10c7523c1e35429c44ff9e20193 Mon Sep 17 00:00:00 2001 From: "R.V.Guha" Date: Mon, 27 Oct 2025 11:47:35 -0700 Subject: [PATCH 4/5] Flatten schema_object and convert to grounding for chatgptapp format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For output_format=chatgptapp, transform each result item: - Flatten all schema_object fields to top level - Replace schema_object with grounding field containing the URL - Preserve all existing top-level fields Example: {schema_object: {headline: ...}} → {grounding: url, headline: ...} This provides richer structured data for ChatGPT Apps widgets. --- code/python/core/utils/message_senders.py | 43 +++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py index cc7dcce7..c6f15c16 100644 --- a/code/python/core/utils/message_senders.py +++ b/code/python/core/utils/message_senders.py @@ -377,14 +377,53 @@ def _transform_to_output_format(self, message): if not content: return None + # Transform each item: flatten schema_object fields and convert to grounding + transformed_content = [] + for item in content: + transformed_item = self._transform_item_for_chatgptapp(item) + transformed_content.append(transformed_item) + # Transform content to resource format resource_content = { "content": [{ "type": "resource", "resource": { - "data": content + "data": transformed_content } }] } - return resource_content \ No newline at end of file + return resource_content + + def _transform_item_for_chatgptapp(self, item): + """ + Transform a single item for chatgptapp format: + - Flatten schema_object fields to top level + - Replace schema_object with grounding field containing the URL + """ + if not isinstance(item, dict): + return item + + # Create a copy to avoid modifying original + transformed = dict(item) + + # Check if schema_object exists + if 'schema_object' in transformed: + schema_obj = transformed['schema_object'] + + if isinstance(schema_obj, dict): + # Extract the URL for grounding (use item's url field) + url = transformed.get('url', '') + + # Flatten all fields from schema_object to top level + for key, value in schema_obj.items(): + # Only add if not already present at top level + if key not in transformed: + transformed[key] = value + + # Replace schema_object with grounding + del transformed['schema_object'] + if url: + transformed['grounding'] = url + + return transformed \ No newline at end of file From 2f13c8d82e6bbddcc87e246074f07574898a875c Mon Sep 17 00:00:00 2001 From: "R.V.Guha" Date: Mon, 27 Oct 2025 12:45:56 -0700 Subject: [PATCH 5/5] Replace Item @type with schema.org @type when present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When transforming for chatgptapp format: - If top-level @type is 'Item' and schema_object has @type, replace with schema @type - Preserves specific schema.org types (Article, Recipe, etc.) instead of generic Item - Non-Item @type values are preserved as-is Example: {@type: 'Item', schema_object: {@type: ['Article']}} → {@type: ['Article']} --- code/python/core/utils/message_senders.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/code/python/core/utils/message_senders.py b/code/python/core/utils/message_senders.py index c6f15c16..4cb428bf 100644 --- a/code/python/core/utils/message_senders.py +++ b/code/python/core/utils/message_senders.py @@ -421,6 +421,10 @@ def _transform_item_for_chatgptapp(self, item): if key not in transformed: transformed[key] = value + # If schema_object has @type and top level @type is "Item", replace with schema @type + if '@type' in schema_obj and transformed.get('@type') == 'Item': + transformed['@type'] = schema_obj['@type'] + # Replace schema_object with grounding del transformed['schema_object'] if url: