Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions code/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ SNOWFLAKE_PAT="<TODO>"
# One of https://docs.snowflake.com/en/user-guide/snowflake-cortex/vector-embeddings#text-embedding-models
SNOWFLAKE_EMBEDDING_MODEL=snowflake-arctic-embed-l-v2.0

DC_API_KEY="<TODO>"

# Fully qualified name of the cortex search service in your snowflake account
# For example TEMP.NLWEB.NLWEB_SAMPLE
# if you used snowflake.sql with --database TEMP --schema NLWEB
Expand Down
5 changes: 5 additions & 0 deletions code/config/config_retrieval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,8 @@ endpoints:
index_name: SNOWFLAKE_CORTEX_SEARCH_SERVICE
db_type: snowflake_cortex_search

data_commons:
api_key_env: DC_API_KEY
index_name: nlweb
db_type: data_commons

63 changes: 63 additions & 0 deletions code/retrieval/data_commons_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import httpx
import json
from typing import Any, Dict, List, Optional, Union
from utils.logging_config_helper import get_configured_logger
from config.config import CONFIG
from utils.logger import LogLevel

logger = get_configured_logger("data_commons_client")

_BASE_URL = 'https://nl.datacommons.org/nodejs/query'
_POINT_PARAMS = f'allCharts=1&mode=toolformer_rig&idx=base_uae_mem'

class DataCommonsSearchClient:
"""
Adapts the DC NL API to the VectorDBClientInterface.
"""
_cfg = None

def __init__(self, endpoint_name: Optional[str] = None):
self._cfg = CONFIG.retrieval_endpoints[endpoint_name]

async def deleted_documents_by_site(self, site: str, **kwargs) -> int:
raise NotImplementedError("Deletion not implemented yet")

async def upload_documents(self, documents: List[Dict[str, Any]], **kwargs) -> int:
raise NotImplementedError("Incremental updates not implemented yet")

async def search(self, query: str, site: Union[str, List[str]], num_results: int=50, **kwargs) -> List[List[str]]:
query = query.strip().replace(' ', '+')
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f'{_BASE_URL}/?&q={query}&key={self._cfg.api_key}&{_POINT_PARAMS}',
timeout=60,
)
response.raise_for_status()
results = []

for c in response.json().get('charts', []):
ctype = c.get('type')
if ctype == 'HIGHLIGHT':
continue
results.append([
c.get('dcUrl', ''), json.dumps(c), c.get('title', ''), "",
])
return results
except Exception as e:
logger.exception(f"Error in DataCommonsSearchClient.search")
logger.log_with_context(
LogLevel.ERROR,
"Data Commons Search retrieval failed",
{
"error_type": type(e).__name__,
"error_message": str(e),
}
)
raise

async def search_by_url(self, url: str, **kwargs) -> Optional[List[str]]:
raise NotImplementedError("Search by url not implemented yet")

async def search_all_sites(self, query: str, num_results: int = 50, **kwargs) -> List[List[str]]:
return await self.search(query, site="", num_results=num_results, **kwargs)
3 changes: 3 additions & 0 deletions code/retrieval/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from retrieval.milvus_client import MilvusVectorClient
from retrieval.qdrant import QdrantVectorClient
from retrieval.snowflake_client import SnowflakeCortexSearchClient
from retrieval.data_commons_client import DataCommonsSearchClient

logger = get_configured_logger("retriever")

Expand Down Expand Up @@ -174,6 +175,8 @@ async def get_client(self) -> VectorDBClientInterface:
client = QdrantVectorClient(self.endpoint_name)
elif self.db_type == "snowflake_cortex_search":
client = SnowflakeCortexSearchClient(self.endpoint_name)
elif self.db_type == 'data_commons':
client = DataCommonsSearchClient(self.endpoint_name)
else:
error_msg = f"Unsupported database type: {self.db_type}"
logger.error(error_msg)
Expand Down
3 changes: 2 additions & 1 deletion static/dropdown-interface.js
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ export class DropdownInterface {
{ id: 'milvus_1', name: 'Milvus' },
{ id: 'qdrant_local', name: 'Qdrant Local' },
{ id: 'qdrant_url', name: 'Qdrant URL' },
{ id: 'snowflake_cortex_search_1', name: 'Snowflake_Cortex_Search' }
{ id: 'snowflake_cortex_search_1', name: 'Snowflake_Cortex_Search' },
{ id: 'data_commons', name: 'DataCommons'}
];
}
}