From 76f465f261735b7a4b566d6ecd0168d8b2dcfa04 Mon Sep 17 00:00:00 2001
From: Prashant Jaiswal <prashjai@Prashants-MacBook-Pro.local>
Date: Fri, 19 Dec 2025 02:23:01 -0800
Subject: [PATCH] Update Qdrant client so that it works

---
 packages/providers/qdrant/vectordb/README.md  | 24 ++++++++-
 .../nlweb_qdrant_vectordb/__init__.py         |  4 +-
 .../nlweb_qdrant_vectordb/qdrant_client.py    | 54 ++++++++++---------
 3 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/packages/providers/qdrant/vectordb/README.md b/packages/providers/qdrant/vectordb/README.md
index 64de675..ee455c2 100644
--- a/packages/providers/qdrant/vectordb/README.md
+++ b/packages/providers/qdrant/vectordb/README.md
@@ -30,7 +30,7 @@ Create `config.yaml`:
 retrieval:
   provider: qdrant
   import_path: nlweb_qdrant_vectordb.qdrant_client
-  class_name: QdrantClient
+  class_name: QdrantVectorClient
   api_endpoint_env: QDRANT_URL  # Optional for remote Qdrant
   api_key_env: QDRANT_API_KEY  # Optional for remote Qdrant
   database_path_env: QDRANT_PATH  # Optional for local Qdrant
@@ -70,14 +70,34 @@ results = await retriever.search(
 
 ## Features
 
-- Vector similarity search with Qdrant
+- Vector similarity search with Qdrant using 1536-dimensional embeddings
 - Support for both remote and local Qdrant instances
 - HNSW-based efficient similarity search
 - Configurable collection names
 - API key authentication for remote instances
 - Local file-based storage option
+- Automatic query embedding using NLWeb's embedding providers
 - Compatible with NLWeb Protocol v0.5
 
+## Data Format
+
+The Qdrant provider expects documents with the following payload structure:
+- `url`: Document URL (string)
+- `content`: Full document content as JSON string (string)
+- `type`: Document type (string)
+- `site`: Site identifier for filtering (string)
+- `embedding`: 1536-dimensional vector (stored separately in Qdrant)
+
+Example payload:
+```json
+{
+  "url": "https://example.com/page",
+  "content": "{\"@type\": \"Article\", \"name\": \"Title\", \"description\": \"...\"}", 
+  "type": "Article",
+  "site": "example.com"
+}
+```
+
 ## Creating Your Own Provider Package
 
 Use this package as a template:
diff --git a/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/__init__.py b/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/__init__.py
index 59e4485..60662ea 100644
--- a/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/__init__.py
+++ b/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/__init__.py
@@ -5,7 +5,7 @@
 NLWeb Qdrant Vector Database Provider
 """
 
-from nlweb_qdrant_vectordb.qdrant_client import QdrantClient
+from nlweb_qdrant_vectordb.qdrant_client import QdrantVectorClient
 from nlweb_qdrant_vectordb.qdrant_writer import QdrantWriter
 
-__all__ = ["QdrantClient", "QdrantWriter"]
+__all__ = ["QdrantVectorClient", "QdrantWriter"]
diff --git a/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/qdrant_client.py b/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/qdrant_client.py
index 661d7c8..66a9907 100644
--- a/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/qdrant_client.py
+++ b/packages/providers/qdrant/vectordb/nlweb_qdrant_vectordb/qdrant_client.py
@@ -19,7 +19,7 @@
 from nlweb_core.retriever import VectorDBClientInterface
 
 
-class QdrantClient(VectorDBClientInterface):
+class QdrantVectorClient(VectorDBClientInterface):
     """
     Client for Qdrant vector database operations, providing a unified interface for
     indexing, storing, and retrieving vector-based search results.
@@ -131,11 +131,11 @@ async def _get_qdrant_client(self) -> AsyncQdrantClient:
         try:
             params = self._create_client_params()
 
-            # Create client with the determined parameters
+            # Create async client
             client = AsyncQdrantClient(**params)
 
             # Test connection by getting collections
-            collections = await client.get_collections()
+            await client.get_collections()
 
             # Store in cache with lock
             with self._client_lock:
@@ -258,7 +258,7 @@ def _create_site_filter(self, site: Union[str, List[str]]):
 
     def _format_results(self, search_result: List[models.ScoredPoint]) -> List[List[str]]:
         """
-        Format Qdrant search results to match expected API: [url, text_json, name, site].
+        Format Qdrant search results to match expected API: [url, content_json, type, site].
 
         Args:
             search_result: Qdrant search results
@@ -270,11 +270,12 @@ def _format_results(self, search_result: List[models.ScoredPoint]) -> List[List[
         for item in search_result:
             payload = item.payload
             url = payload.get("url", "")
-            schema = payload.get("schema_json", "")
-            name = payload.get("name", "")
+            # Content is stored as JSON string in Qdrant payload
+            content = payload.get("content", "")
+            type_name = payload.get("type", "")
             site_name = payload.get("site", "")
 
-            results.append([url, schema, name, site_name])
+            results.append([url, content, type_name, site_name])
 
         return results
 
@@ -298,11 +299,12 @@ async def search(
             query_params: Additional query parameters
 
         Returns:
-            List[List[str]]: List of search results in format [url, text_json, name, site]
+            List[List[str]]: List of search results in format [url, content_json, type, site]
         """
         collection_name = collection_name or self.default_collection_name
 
         try:
+            # Embed the query using NLWeb's embedding function
             start_embed = time.time()
             embedding = await get_embedding(query, query_params=query_params)
             embed_time = time.time() - start_embed
@@ -314,24 +316,24 @@ async def search(
             filter_condition = self._create_site_filter(site)
 
             # Ensure collection exists before searching
-            collection_created = await self.ensure_collection_exists(
-                collection_name, len(embedding)
+            if not await client.collection_exists(collection_name):
+                # Collection doesn't exist, return empty results
+                return []
+            
+            # Perform the search using async client query_points method
+            search_response = await client.query_points(
+                collection_name=collection_name,
+                query=embedding,
+                limit=num_results,
+                query_filter=filter_condition,
+                with_payload=True,  # Critical: fetches the content back
             )
-            if collection_created:
-                # Collection was just created, return empty results
-                results = []
-            else:
-                # Perform the search
-                search_result = await client.search(
-                    collection_name=collection_name,
-                    query_vector=embedding,
-                    limit=num_results,
-                    query_filter=filter_condition,
-                    with_payload=True,
-                )
+            
+            # Extract points from response
+            search_result = search_response.points
 
-                # Format the results
-                results = self._format_results(search_result)
+            # Format the results
+            results = self._format_results(search_result)
 
             retrieve_time = time.time() - start_retrieve
 
@@ -380,8 +382,8 @@ async def search_by_url(
                 payload = item.payload
                 formatted_result = [
                     payload.get("url", ""),
-                    payload.get("schema_json", ""),
-                    payload.get("name", ""),
+                    payload.get("content", ""),  # Content as JSON string
+                    payload.get("type", ""),
                     payload.get("site", ""),
                 ]