@@ -60,7 +60,9 @@ def build_prompt(self, text: str) -> str:
6060 return prompt
6161
6262 async def extract (self , chunk : dict ) -> dict :
63- text = chunk .get ("text" , "" )
63+ _chunk_id = list (chunk .keys ())[0 ]
64+ text = chunk [_chunk_id ].get ("content" , "" )
65+
6466 prompt = self .build_prompt (text )
6567 response = await self .llm_client .generate_answer (prompt )
6668 try :
@@ -74,13 +76,20 @@ async def extract(self, chunk: dict) -> dict:
7476 return {}
7577 main_keys_info = {key : extracted_info [key ] for key in self .required_keys }
7678 logger .debug ("Extracted info: %s" , extracted_info )
77- return {compute_dict_hash (main_keys_info , prefix = "extract" ): extracted_info }
79+
80+ # add chunk metadata
81+ extracted_info ["_chunk_id" ] = _chunk_id
82+
83+ return {
84+ compute_dict_hash (main_keys_info , prefix = "extract-" ): extracted_info
85+ }
7886 except json .JSONDecodeError :
7987 logger .error ("Failed to parse extraction response: %s" , response )
8088 return {}
8189
90+ @staticmethod
8291 async def merge_extractions (
83- self , extraction_list : List [Dict [str , dict ]]
92+ extraction_list : List [Dict [str , dict ]]
8493 ) -> Dict [str , dict ]:
8594 """
8695 Merge multiple extraction results based on their hashes.
0 commit comments