Skip to content

Commit f867e6f

Browse files
ChrisJarChristopher Jarrett
andauthored
Fix issues in filter search results docs (NVIDIA#958)
Co-authored-by: Christopher Jarrett <christopherjarrett@christopherjarrett-mlt.client.nvidia.com>
1 parent b871cab commit f867e6f

1 file changed

Lines changed: 31 additions & 33 deletions

File tree

docs/docs/extraction/custom-metadata.md

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ You can create whatever metadata is helpful for your scenario.
3636
```python
3737
import pandas as pd
3838

39-
meta_df = pd.DataFrame
40-
(
39+
meta_df = pd.DataFrame(
4140
{
4241
"source": ["data/woods_frost.pdf", "data/multimodal_test.pdf"],
4342
"category": ["Alpha", "Bravo"],
@@ -70,24 +69,24 @@ sparse = True
7069
ingestor = (
7170
Ingestor(message_client_hostname=hostname)
7271
.files(["data/woods_frost.pdf", "data/multimodal_test.pdf"])
73-
.extract(
74-
extract_text=True,
75-
extract_tables=True,
76-
extract_charts=True,
77-
extract_images=True,
78-
text_depth="page"
79-
)
80-
.embed(text=True, tables=True)
81-
.vdb_upload(
82-
collection_name=collection_name,
83-
milvus_uri=f"http://{hostname}:19530",
84-
sparse=sparse,
85-
minio_endpoint=f"{hostname}:9000",
86-
dense_dim=2048,
87-
meta_dataframe=file_path,
88-
meta_source_field="source",
89-
meta_fields=["category, department, timestamp"]
90-
)
72+
.extract(
73+
extract_text=True,
74+
extract_tables=True,
75+
extract_charts=True,
76+
extract_images=True,
77+
text_depth="page"
78+
)
79+
.embed()
80+
.vdb_upload(
81+
collection_name=collection_name,
82+
milvus_uri=f"http://{hostname}:19530",
83+
sparse=sparse,
84+
minio_endpoint=f"{hostname}:9000",
85+
dense_dim=2048,
86+
meta_dataframe=file_path,
87+
meta_source_field="source",
88+
meta_fields=["category", "department", "timestamp"]
89+
)
9190
)
9291
results = ingestor.ingest_async().result()
9392
```
@@ -156,20 +155,19 @@ filter_expr = 'content_metadata["department"] == "Engineering"'
156155
queries = ["this is expensive"]
157156
q_results = []
158157
for que in queries:
159-
q_results
160-
.append(
161-
nvingest_retrieval(
162-
[que],
163-
collection_name,
164-
f"http://{hostname}:19530",
165-
embedding_endpoint=f"http://{hostname}:8012/v1",
166-
hybrid=sparse,
167-
top_k=top_k,
168-
model_name=model_name,
169-
gpu_search=False,
170-
_filter=filter_expr
171-
)
158+
q_results.append(
159+
nvingest_retrieval(
160+
[que],
161+
collection_name,
162+
milvus_uri=f"http://{hostname}:19530",
163+
embedding_endpoint=f"http://{hostname}:8012/v1",
164+
hybrid=sparse,
165+
top_k=top_k,
166+
model_name=model_name,
167+
gpu_search=False,
168+
_filter=filter_expr
172169
)
170+
)
173171

174172
print(f"{q_results}")
175173
```

0 commit comments

Comments
 (0)