diff --git a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py index 97e8637be..587f77703 100644 --- a/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py +++ b/nemo_retriever/src/nemo_retriever/examples/batch_pipeline.py @@ -589,6 +589,9 @@ def main( lancedb_uri = str(Path(lancedb_uri).expanduser().resolve()) _ensure_lancedb_table(lancedb_uri, LANCEDB_TABLE) + if hybrid: + print("[hybrid] Hybrid search enabled (dense + FTS).") + # Remote endpoints don't need local model GPUs for their stage. if page_elements_invoke_url and float(gpu_page_elements) != 0.0: print( @@ -870,6 +873,7 @@ def main( hybrid=hybrid, ) + print(f"[recall] Search mode: {'hybrid (dense + FTS + RRF rerank)' if hybrid else 'vector-only'}") _df_query, _gold, _raw_hits, _retrieved_keys, metrics = retrieve_and_score(query_csv=query_csv, cfg=cfg) if not no_recall_details: diff --git a/nemo_retriever/src/nemo_retriever/ingest_modes/batch.py b/nemo_retriever/src/nemo_retriever/ingest_modes/batch.py index e33f82ee3..a85d07cce 100644 --- a/nemo_retriever/src/nemo_retriever/ingest_modes/batch.py +++ b/nemo_retriever/src/nemo_retriever/ingest_modes/batch.py @@ -1101,6 +1101,7 @@ def _create_lancedb_index(self) -> None: fts_language = str(kw.get("fts_language", "English")) try: table.create_fts_index(text_column, language=fts_language) + print(f"[hybrid] FTS index created on column {text_column!r} (language={fts_language!r})") except Exception as e: print( f"Warning: FTS index creation failed on column {text_column!r} (continuing with vector-only): {e}" @@ -1109,4 +1110,5 @@ def _create_lancedb_index(self) -> None: for index_stub in table.list_indices(): table.wait_for_index([index_stub.name], timeout=timedelta(seconds=600)) - print(f"Wrote {n_vecs} rows to LanceDB uri={lancedb_uri!r} table={table_name!r}") + index_names = [idx.name for idx in table.list_indices()] + print(f"Wrote {n_vecs} rows to LanceDB uri={lancedb_uri!r} table={table_name!r} indices={index_names}") diff --git a/nemo_retriever/src/nemo_retriever/recall/core.py b/nemo_retriever/src/nemo_retriever/recall/core.py index 9fc80d59f..ec9bbb1d0 100644 --- a/nemo_retriever/src/nemo_retriever/recall/core.py +++ b/nemo_retriever/src/nemo_retriever/recall/core.py @@ -196,6 +196,8 @@ def _search_lancedb( if effective_nprobes <= 0: effective_nprobes = 16 # safe fallback matching default index config + if hybrid: + logger.info("Recall search using hybrid mode (dense + FTS + RRF rerank)") results: List[List[Dict[str, Any]]] = [] for i, v in enumerate(query_vectors): q = np.asarray(v, dtype="float32")