run-llama · ofermend · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025
diff --git a/docs/docs/community/integrations/managed_indices.md b/docs/docs/community/integrations/managed_indices.md
@@ -39,7 +39,14 @@ See the [notebook guide](../../examples/managed/GoogleDemo.ipynb) for full detai
 ## Vectara
 
 First, [sign up](https://vectara.com/integrations/llama_index) and use the Vectara Console to create a corpus (aka Index), and add an API key for access.
-Then put the customer id, corpus id, and API key in your environment.
+Once you have your API key, export it as an environment variable:
+
+```python
+import os
+
+os.environ["VECTARA_API_KEY"] = "<YOUR_VECTARA_API_KEY>"
+os.environ["VECTARA_CORPUS_KEY"] = "<YOUR_VECTARA_CORPUS_KEY>"
+```
 
 Then construct the Vectara Index and query it as follows:
 
@@ -48,30 +55,28 @@ from llama_index.core import ManagedIndex, SimpleDirectoryReade
 from llama_index.indices.managed.vectara import VectaraIndex
 
 # Load documents and build index
-vectara_customer_id = os.environ.get("VECTARA_CUSTOMER_ID")
-vectara_corpus_id = os.environ.get("VECTARA_CORPUS_ID")
+vectara_corpus_key = os.environ.get("VECTARA_CORPUS_KEY")
 vectara_api_key = os.environ.get("VECTARA_API_KEY")
 
 documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectaraIndex.from_documents(
     documents,
-    vectara_customer_id=vectara_customer_id,
-    vectara_corpus_id=vectara_corpus_id,
+    vectara_corpus_key=vectara_corpus_key,
     vectara_api_key=vectara_api_key,
 )
 ```
 
 Notes:
-* If the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY` are in the environment already, you do not have to explicitly specify them in your call and the VectaraIndex class will read them from the environment.
-* To connect to multiple Vectara corpora, you can set `VECTARA_CORPUS_ID` to a comma-separated list, for example: `12,51` would connect to corpus `12` and corpus `51`.
+* If the environment variables `VECTARA_CORPUS_KEY` and `VECTARA_API_KEY` are in the environment already, you do not have to explicitly specify them in your call and the VectaraIndex class will read them from the environment.
+* To connect to multiple Vectara corpora, you can set `VECTARA_CORPUS_KEY` to a comma-separated list, for example: `12,51` would connect to corpus `12` and corpus `51`.
 
 If you already have documents in your corpus, you can just access the data directly by constructing the `VectaraIndex` as follows:
 
 ```python
 index = VectaraIndex()
 ```
 
-And the index will connect to the existing corpus without loading any new documents.
+The VectaraIndex will connect to the existing corpus without loading any new documents.
 
 To query the index, simply construct a query engine as follows:
 

diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/README.md b/llama-index-integrations/indices/llama-index-indices-managed-vectara/README.md
@@ -3,32 +3,40 @@
 The Vectara Index provides a simple implementation to Vectara's end-to-end RAG pipeline,
 including data ingestion, document retrieval, reranking results, summary generation, and hallucination evaluation.
 
-## Setup
+Please note that this documentation applies to versions >= 0.4.0 and will not be the same as for earlier versions of Vectara `ManagedIndex`.
+
+## 📌 Setup
 
 First, make sure you have the latest LlamaIndex version installed.
 
+```
+pip install -U llama-index
+```
+
 Next, install the Vectara Index:
 
 ```
 pip install -U llama-index-indices-managed-vectara
 ```
 
-Finally, set up your Vectara corpus. If you don't have a Vectara account, you can [sign up](https://vectara.com/integrations/llamaindex) and follow our [Quick Start](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key (make sure it has both indexing and query permissions).
+Finally, set up your Vectara corpus. If you don't have a Vectara account, you can [sign up](https://vectara.com/integrations/llamaindex) and follow our [Quick Start](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key (make sure the api_key has both indexing and query permissions, or use your personal API key).
 
-## Usage
-
-Please note that this usage example is for versions >= 0.4.0 and will not be the same as for earlier versions of Vectara ManagedIndex.
-
-First let's initialize the index with some sample documents.
-Make sure to always specify a unique `id_` for every document you add to your index.
-If you don't specify this parameter, a random id will be generated and the document will be separately added to your corpus every time you run your code.
+Once you have your API key, export it as an environment variable:
 
 ```python
 import os
 
 os.environ["VECTARA_API_KEY"] = "<YOUR_VECTARA_API_KEY>"
 os.environ["VECTARA_CORPUS_KEY"] = "<YOUR_VECTARA_CORPUS_KEY>"
+```
+
+## 🚀 Usage
+
+### 1. Index Documents
+
+Create an index and add some sample documents:
 
+```python
 from llama_index.indices.managed.vectara import VectaraIndex
 from llama_index.core.schema import Document, MediaResource
 
@@ -52,21 +60,29 @@ docs = [
         ),
     ),
 ]
-
 index = VectaraIndex.from_documents(docs)
 ```
 
+Make sure to always specify a unique `id_` for every document you add to your index.
+If you don't specify this parameter, a random id will be generated and the document will be separately added to your corpus every time you run your code.
+
 You can now use this index to retrieve documents.
 
+### 2. Retrieve Documents
+
+Retrieve the top 2 most relevant document for a query:
+
 ```python
 # Retrieves the top search result
-retriever = index.as_retriever(similarity_top_k=1)
+retriever = index.as_retriever(similarity_top_k=2)
 
 results = retriever.retrieve("How will users feel about this new tool?")
 print(results[0])
 ```
 
-You can also use it as a query engine to get a generated summary from the retrieved results.
+### 3. Use as a Query Engine
+
+Generate a summary of retrieved results:
 
 ```python
 query_engine = index.as_query_engine()
@@ -80,4 +96,39 @@ for node in results.source_nodes[:2]:
     print(node)
 ```
 
+## 📂 Understanding `source_nodes` structure
+
+Each node object in `source_nodes` contains a `NodeWithScore` object with:
+
+- `text`: The matched text snippet.
+- `id_`: The unique identifier of the document.
+- `metadata`: A dictionary containing:
+  - Key-value pairs from the matched part of the document.
+  - A `document` key that stores all document-level metadata.
+- `score`: The relevance score of the match.
+
+Example Output:
+
+```
+NodeWithScore(
+    node=Node(
+        text_resource=MediaResource(
+            text="This is a test text for Vectara integration with LlamaIndex."
+        ),
+        id_="doc1",
+        metadata={
+            "category": "AI",
+            "page": 23,
+            "document": {
+                "url": "https://www.vectara.com/developers/build/integrations/llamaindex",
+                "title": "LlamaIndex + Vectara Integration",
+                "author": "Ofer Mendelevitch",
+                "date": "2025-03-01"
+            }
+        }
+    ),
+    score=0.89
+)
+```
+
 If you want to see the full features and capabilities of `VectaraIndex`, check out this Jupyter [notebook](https://github.com/vectara/example-notebooks/blob/main/notebooks/using-vectara-with-llamaindex.ipynb).
diff --git a/...s/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py b/...s/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py
@@ -262,7 +262,7 @@ def add_document(
         description: Optional[str] = None,
         max_chars_per_chunk: Optional[int] = None,
     ) -> None:
-        """ "
+        """
         Indexes a document into a corpus using the Vectara Structured Document format.
 
         Full API Docs: https://docs.vectara.com/docs/api-reference/indexing-apis/indexing#structured-document-object-definition

diff --git a/...ices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/retriever.py b/...ices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/retriever.py
@@ -456,9 +456,16 @@ def text_generator() -> TokenGen:
                                                     text=search_result["text"]
                                                 ),
                                                 id_=search_result["document_id"],
-                                                metadata=search_result[
-                                                    "document_metadata"
-                                                ],
+                                                metadata={
+                                                    # Metadata from the matched part
+                                                    **search_result.get(
+                                                        "part_metadata", {}
+                                                    ),
+                                                    # Document-level metadata
+                                                    "document": search_result.get(
+                                                        "document_metadata", {}
+                                                    ),
+                                                },
                                             ),
                                             score=search_result["score"],
                                         )
@@ -570,7 +577,12 @@ def _vectara_query(
                 node=Node(
                     text_resource=MediaResource(text=search_result["text"]),
                     id_=search_result["document_id"],
-                    metadata=search_result["document_metadata"],
+                    metadata={
+                        # Metadata from the matched part
+                        **search_result.get("part_metadata", {}),
+                        # Document-level metadata
+                        "document": search_result.get("document_metadata", {}),
+                    },
                 ),
                 score=search_result["score"],
             )

diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-vectara/pyproject.toml
@@ -31,7 +31,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-indices-managed-vectara"
 readme = "README.md"
-version = "0.4.1"
+version = "0.4.2"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"

diff --git a/...rations/indices/llama-index-indices-managed-vectara/tests/test_indices_managed_vectara.py b/...rations/indices/llama-index-indices-managed-vectara/tests/test_indices_managed_vectara.py
@@ -313,7 +313,7 @@ def test_update_doc(vectara1) -> None:
     res = qe.retrieve("Find me something completely different.")
     assert len(res) == 1
     assert res[0].node.get_content() == docs[1].text
-    assert res[0].node.metadata["test_score"] == 14
+    assert res[0].node.metadata["document"]["test_score"] == 14
 
 
 @pytest.fixture()
@@ -505,13 +505,13 @@ def test_simple_retrieval_with_nodes(vectara3) -> None:
     )
     res = qe.retrieve("Find me something different")
     assert len(res) == 1
-    assert res[0].node.metadata["author"] == "Vectara"
-    assert res[0].node.metadata["title"] == "LlamaIndex Integration"
+    assert res[0].node.metadata["document"]["author"] == "Vectara"
+    assert res[0].node.metadata["document"]["title"] == "LlamaIndex Integration"
     assert res[0].node.get_content() == nodes[1].text_resource.text
+    assert res[0].node.metadata["test_score"] == 2
 
 
 def test_filter_with_nodes(vectara3) -> None:
-    nodes = get_nodes()
     qe = vectara3.as_retriever(
         similarity_top_k=2,
         n_sentences_before=0,