Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions code/python/core/baseHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ def __init__(self, query_params, http_handler):
# Maximum number of results to return to the user
self.max_results = get_param(query_params, "max_results", int, 10)

# Output format - can be "chatgptapp" for ChatGPT App spec v0.5 format
self.output_format = get_param(query_params, "output_format", str, "default")

# the items that have been retrieved from the vector database, could be before decontextualization.
# See below notes on fasttrack
self.retrieved_items = []
Expand Down
83 changes: 81 additions & 2 deletions code/python/core/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,5 +470,84 @@ def create_legacy_message(message_type: str, content: Any,
message["conversation_id"] = conversation_id
if sender_info:
message["sender_info"] = sender_info

return message

return message


def format_response_to_chatgpt_spec(results: List[Dict[str, Any]],
conversation_id: Optional[str] = None,
text_description: Optional[str] = None,
version: str = "0.5",
additional_meta: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Format NLWeb results according to the ChatGPT App specification v0.5.

Args:
results: List of result items (each with @type, url, name, etc.)
conversation_id: Conversation identifier
text_description: Optional natural language description
version: API version number (default: "0.5")
additional_meta: Additional metadata fields (e.g., openai/outputTemplate)

Returns:
Dict formatted according to ChatGPT App spec with _meta and content fields

Example output:
{
"_meta": {
"conversation_id": "conv-123",
"version": "0.5"
},
"content": [
{
"type": "text",
"text": "Found 3 restaurants..."
},
{
"type": "resource",
"resource": {
"data": [
{"@type": "Restaurant", "name": "...", ...}
]
}
}
]
}
"""
# Build _meta object
meta = {"version": version}
if conversation_id:
meta["conversation_id"] = conversation_id
if additional_meta:
meta.update(additional_meta)

# Build content array
content = []

# Add text item if provided
if text_description:
content.append({
"type": "text",
"text": text_description
})

# Add resource item with data
if results:
# Determine if we have single or multiple results
if len(results) == 1:
data = results[0]
else:
data = results

resource_item = {
"type": "resource",
"resource": {
"data": data
}
}
content.append(resource_item)

return {
"_meta": meta,
"content": content
}
153 changes: 140 additions & 13 deletions code/python/core/utils/message_senders.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,18 @@ async def send_begin_response(self):
"""Send begin-nlweb-response message at the start of query processing."""
if not (self.handler.streaming and self.handler.http_handler is not None):
return


# Skip for chatgptapp format
if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp':
return

begin_message = {
"message_type": "begin-nlweb-response",
"conversation_id": self.handler.conversation_id,
"query": self.handler.query,
"timestamp": int(time.time() * 1000)
}

try:
await self.handler.http_handler.write_stream(begin_message)
except Exception:
Expand All @@ -123,22 +127,26 @@ async def send_begin_response(self):
async def send_end_response(self, error=False):
"""
Send end-nlweb-response message at the end of query processing.

Args:
error: If True, indicates the query ended with an error
"""
if not (self.handler.streaming and self.handler.http_handler is not None):
return


# Skip for chatgptapp format
if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp':
return

end_message = {
"message_type": "end-nlweb-response",
"conversation_id": self.handler.conversation_id,
"timestamp": int(time.time() * 1000)
}

if error:
end_message["error"] = True

try:
await self.handler.http_handler.write_stream(end_message)
except Exception:
Expand Down Expand Up @@ -281,26 +289,145 @@ async def send_message(self, message):
message_type = message.get('message_type', 'unknown')
# print(f"[MessageSender] Sending message type: {message_type}")
message = self.add_message_metadata(message)

# Always store the message (for both streaming and non-streaming)
self.store_message(message)

if (self.handler.streaming and self.handler.http_handler is not None):
# Streaming mode: also send via write_stream

# Check if this is the first result and add time-to-first-result header
if message.get("message_type") == "result" and not self.handler.first_result_sent:
self.handler.first_result_sent = True
await self.send_time_to_first_result()

# Send headers if not already sent
await self._send_headers_if_needed(is_streaming=True)

try:
await self.handler.http_handler.write_stream(message)
# For chatgptapp format, send _meta block first
if hasattr(self.handler, 'output_format') and self.handler.output_format == 'chatgptapp':
if message.get('message_type') == 'result':
await self._send_chatgptapp_meta_if_needed(message.get('conversation_id'))

# Transform to ChatGPT App format if requested
output_message = self._transform_to_output_format(message)

# Skip sending if transform returned None (e.g., for captured query_rewrite)
if output_message is not None:
await self.handler.http_handler.write_stream(output_message)
except Exception as e:
self.handler.connection_alive_event.clear() # Use event instead of flag
else:
# Non-streaming mode: just store (already done above)
# Send headers if not already sent
await self._send_headers_if_needed(is_streaming=False)
await self._send_headers_if_needed(is_streaming=False)

async def _send_chatgptapp_meta_if_needed(self, conversation_id):
"""Send _meta block once for chatgptapp format."""
if not hasattr(self.handler, '_chatgptapp_meta_sent') or not self.handler._chatgptapp_meta_sent:
self.handler._chatgptapp_meta_sent = True

meta_message = {
"_meta": {
"openai/outputTemplate": "ui://widget/list.html",
"nlweb/version": "0.5"
}
}

# Include conversation_id only if present and non-empty
if conversation_id:
meta_message["_meta"]["nlweb/conversationId"] = conversation_id

# Include decontextualized_query only if it differs from original query
if hasattr(self.handler, 'decontextualized_query') and self.handler.decontextualized_query:
# Skip if same as original query
if self.handler.decontextualized_query != self.handler.query:
meta_message["_meta"]["nlweb/decontextualizedQuery"] = self.handler.decontextualized_query

try:
await self.handler.http_handler.write_stream(meta_message)
except Exception as e:
self.handler.connection_alive_event.clear()
raise

def _transform_to_output_format(self, message):
"""
Transform message to requested output format (e.g., chatgptapp).

For chatgptapp format, only _meta and content messages are allowed.
All other message types are filtered out.
"""
# Check if chatgptapp format is requested
if not hasattr(self.handler, 'output_format') or self.handler.output_format != 'chatgptapp':
return message

# For chatgptapp format, only allow 'result' message_type
# All other message types should be filtered out
message_type = message.get('message_type')
if message_type != 'result':
# Skip all non-result messages for chatgptapp format
# This includes: decontextualized_query, query_rewrite, asking_sites,
# site_querying, site_complete, site_error, intermediate_message,
# tool_selection, tool_routing, nlws, ensemble_result, etc.
return None

# Extract content (results array)
content = message.get('content', [])
if not content:
return None

# Transform each item: flatten schema_object fields and convert to grounding
transformed_content = []
for item in content:
transformed_item = self._transform_item_for_chatgptapp(item)
transformed_content.append(transformed_item)

# Transform content to resource format
resource_content = {
"content": [{
"type": "resource",
"resource": {
"data": transformed_content
}
}]
}

return resource_content

def _transform_item_for_chatgptapp(self, item):
"""
Transform a single item for chatgptapp format:
- Flatten schema_object fields to top level
- Replace schema_object with grounding field containing the URL
"""
if not isinstance(item, dict):
return item

# Create a copy to avoid modifying original
transformed = dict(item)

# Check if schema_object exists
if 'schema_object' in transformed:
schema_obj = transformed['schema_object']

if isinstance(schema_obj, dict):
# Extract the URL for grounding (use item's url field)
url = transformed.get('url', '')

# Flatten all fields from schema_object to top level
for key, value in schema_obj.items():
# Only add if not already present at top level
if key not in transformed:
transformed[key] = value

# If schema_object has @type and top level @type is "Item", replace with schema @type
if '@type' in schema_obj and transformed.get('@type') == 'Item':
transformed['@type'] = schema_obj['@type']

# Replace schema_object with grounding
del transformed['schema_object']
if url:
transformed['grounding'] = url

return transformed
10 changes: 9 additions & 1 deletion code/python/webserver/mcp_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ async def handle_tools_list(self, params):
"enum": ["list", "generate", "summarize"],
"description": "The type of response to generate",
"default": "list"
},
"output_format": {
"type": "string",
"enum": ["default", "chatgptapp"],
"description": "Output format for the response. Use 'chatgptapp' for ChatGPT App spec v0.5 format",
"default": "chatgptapp"
}
},
"required": ["query"]
Expand Down Expand Up @@ -304,13 +310,15 @@ async def handle_tools_call(self, params, query_params):
# print(f"Query: {query}")
sites = arguments.get("site", [])
generate_mode = arguments.get("generate_mode", "list")

output_format = arguments.get("output_format", "chatgptapp") # Default to chatgptapp for MCP

# Update query params with MCP arguments
# Make sure to format values as lists (like URL parameters)
query_params["query"] = [query] if query else []
if sites:
query_params["site"] = sites if isinstance(sites, list) else [sites]
query_params["generate_mode"] = [generate_mode] if generate_mode else ["list"]
query_params["output_format"] = [output_format]
# print(f"=== QUERY PARAMS BEING PASSED ===")
# print(f"query_params: {query_params}")

Expand Down
4 changes: 2 additions & 2 deletions static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ <h3>Login with Email</h3>
// Parse ALL URL parameters
const urlParams = new URLSearchParams(window.location.search);

// Extract connection mode
// Extract connection mode - default to SSE
const mode = urlParams.get('mode') || urlParams.get('connection');
const connectionType = (mode === 'http' || mode === 'sse') ? 'sse' : 'websocket';
const connectionType = (mode === 'websocket') ? 'websocket' : 'sse';

// Collect all additional parameters to pass to backend
const additionalParams = {};
Expand Down