@@ -36,8 +36,7 @@ You can create whatever metadata is helpful for your scenario.
3636``` python
3737import pandas as pd
3838
39- meta_df = pd.DataFrame
40- (
39+ meta_df = pd.DataFrame(
4140 {
4241 " source" : [" data/woods_frost.pdf" , " data/multimodal_test.pdf" ],
4342 " category" : [" Alpha" , " Bravo" ],
@@ -70,24 +69,24 @@ sparse = True
7069ingestor = (
7170 Ingestor(message_client_hostname = hostname)
7271 .files([" data/woods_frost.pdf" , " data/multimodal_test.pdf" ])
73- .extract(
74- extract_text = True ,
75- extract_tables = True ,
76- extract_charts = True ,
77- extract_images = True ,
78- text_depth = " page"
79- )
80- .embed(text = True , tables = True )
81- .vdb_upload(
82- collection_name = collection_name,
83- milvus_uri = f " http:// { hostname} :19530 " ,
84- sparse = sparse,
85- minio_endpoint = f " { hostname} :9000 " ,
86- dense_dim = 2048 ,
87- meta_dataframe = file_path,
88- meta_source_field = " source" ,
89- meta_fields = [" category, department, timestamp" ]
90- )
72+ .extract(
73+ extract_text = True ,
74+ extract_tables = True ,
75+ extract_charts = True ,
76+ extract_images = True ,
77+ text_depth = " page"
78+ )
79+ .embed()
80+ .vdb_upload(
81+ collection_name = collection_name,
82+ milvus_uri = f " http:// { hostname} :19530 " ,
83+ sparse = sparse,
84+ minio_endpoint = f " { hostname} :9000 " ,
85+ dense_dim = 2048 ,
86+ meta_dataframe = file_path,
87+ meta_source_field = " source" ,
88+ meta_fields = [" category" , " department" , " timestamp" ]
89+ )
9190)
9291results = ingestor.ingest_async().result()
9392```
@@ -156,20 +155,19 @@ filter_expr = 'content_metadata["department"] == "Engineering"'
156155queries = [" this is expensive" ]
157156q_results = []
158157for que in queries:
159- q_results
160- .append(
161- nvingest_retrieval(
162- [que],
163- collection_name,
164- f " http:// { hostname} :19530 " ,
165- embedding_endpoint = f " http:// { hostname} :8012/v1 " ,
166- hybrid = sparse,
167- top_k = top_k,
168- model_name = model_name,
169- gpu_search = False ,
170- _filter = filter_expr
171- )
158+ q_results.append(
159+ nvingest_retrieval(
160+ [que],
161+ collection_name,
162+ milvus_uri = f " http:// { hostname} :19530 " ,
163+ embedding_endpoint = f " http:// { hostname} :8012/v1 " ,
164+ hybrid = sparse,
165+ top_k = top_k,
166+ model_name = model_name,
167+ gpu_search = False ,
168+ _filter = filter_expr
172169 )
170+ )
173171
174172print (f " { q_results} " )
175173```
0 commit comments