@@ -215,7 +215,18 @@ def _is_already_searched(self, doc: dict) -> bool:
215215
216216 return False
217217
218- def _normalize_searched_data (self , doc : dict ) -> dict :
218+ @staticmethod
219+ def _clean_value (v ):
220+ """Recursively convert numpy arrays and other problematic types to Python-native types."""
221+ if isinstance (v , np .ndarray ):
222+ return v .tolist ()
223+ if isinstance (v , (list , tuple )):
224+ return [SearchService ._clean_value (item ) for item in v ]
225+ if isinstance (v , dict ):
226+ return {k : SearchService ._clean_value (val ) for k , val in v .items ()}
227+ return v
228+
229+ def _normalize_searched_data (self , doc : dict ) -> dict : # pylint: disable=too-many-branches
219230 """
220231 Normalize a document that already contains search results to the expected format.
221232
@@ -289,7 +300,7 @@ def _normalize_searched_data(self, doc: dict) -> dict:
289300
290301 return normalized_doc
291302
292- def process (self , batch : pd .DataFrame ) -> pd .DataFrame :
303+ def process (self , batch : pd .DataFrame ) -> pd .DataFrame : # pylint: disable=too-many-branches
293304 """
294305 Process a batch of documents and perform searches.
295306 This is the Ray Data operator interface.
@@ -397,18 +408,7 @@ def process(self, batch: pd.DataFrame) -> pd.DataFrame:
397408
398409 # Convert numpy arrays and complex types to Python-native types
399410 # to avoid Ray Data tensor extension casting issues
400- def clean_value (v ):
401- """Recursively convert numpy arrays and other problematic types to Python-native types."""
402- if isinstance (v , np .ndarray ):
403- return v .tolist ()
404- elif isinstance (v , (list , tuple )):
405- return [clean_value (item ) for item in v ]
406- elif isinstance (v , dict ):
407- return {k : clean_value (val ) for k , val in v .items ()}
408- else :
409- return v
410-
411- cleaned_result = {k : clean_value (v ) for k , v in result .items ()}
411+ cleaned_result = {k : self ._clean_value (v ) for k , v in result .items ()}
412412
413413 # Create document row with all result fields plus required fields
414414 row = {
0 commit comments