44
55"""
66
7+ import asyncio
78import logging
89from collections import Counter
910from functools import partial
1011from typing import Any , Callable , Dict , List , Optional , cast
11- import asyncio
12- from llama_index .utils import iter_batch
12+
1313from llama_index .bridge .pydantic import PrivateAttr
1414from llama_index .schema import BaseNode , MetadataMode , TextNode
15+ from llama_index .utils import iter_batch
1516from llama_index .vector_stores .types import (
1617 BasePydanticVectorStore ,
1718 MetadataFilters ,
@@ -100,12 +101,16 @@ def _to_pinecone_filter(standard_filters: MetadataFilters) -> dict:
100101 return filters
101102
102103
103- async def async_upload (index , vectors , batch_size , semaphore ):
104- async def send_batch (batch ):
104+ async def async_upload (
105+ index : Any , vectors : List [Dict ], batch_size : int , semaphore : asyncio .Semaphore
106+ ) -> None :
107+ async def send_batch (batch : List [Dict ]): # type: ignore
105108 async with semaphore :
106109 return await asyncio .to_thread (index .upsert , batch , async_req = True )
107-
108- await asyncio .gather (* [send_batch (chunk ) for chunk in iter_batch (vectors , size = batch_size )])
110+
111+ await asyncio .gather (
112+ * [send_batch (chunk ) for chunk in iter_batch (vectors , size = batch_size )]
113+ )
109114
110115
111116import_err_msg = (
@@ -250,12 +255,12 @@ def _prepare_entries_for_upsert(self, nodes: List[BaseNode]) -> List[Dict]:
250255 if self .add_sparse_vector :
251256 sparse_vector = generate_sparse_vectors (
252257 [node .get_content (metadata_mode = MetadataMode .EMBED )],
253- self ._tokenizer ,
258+ self ._tokenizer , # type: ignore
254259 )[0 ]
255260 entry [SPARSE_VECTOR_KEY ] = sparse_vector
256261
257262 entries .append (entry )
258-
263+
259264 return entries
260265
261266 def add (
@@ -268,7 +273,6 @@ def add(
268273 nodes: List[BaseNode]: list of nodes with embeddings
269274
270275 """
271-
272276 entries = self ._prepare_entries_for_upsert (nodes )
273277
274278 [
@@ -293,15 +297,13 @@ async def async_add(
293297 Returns:
294298 List[str]: List of IDs of the added documents.
295299 """
296-
297300 entries = self ._prepare_entries_for_upsert (nodes )
298301
299302 semaphore = asyncio .Semaphore (SEM_MAX_CONCURRENT )
300303 await async_upload (self ._pinecone_index , entries , DEFAULT_BATCH_SIZE , semaphore )
301304
302305 return [entry [ID_KEY ] for entry in entries ]
303306
304-
305307 def delete (self , ref_doc_id : str , ** delete_kwargs : Any ) -> None :
306308 """
307309 Delete nodes using with ref_doc_id.
0 commit comments