File tree Expand file tree Collapse file tree 1 file changed +25
-0
lines changed
Expand file tree Collapse file tree 1 file changed +25
-0
lines changed Original file line number Diff line number Diff line change 1+ import pickle
2+ from typing import Any , Dict , List
3+
4+ from graphgen .bases .base_reader import BaseReader
5+
6+
7+ class PickleReader (BaseReader ):
8+ """
9+ Read pickle files, requiring the top-level object to be List[Dict[str, Any]].
10+ """
11+
12+ def read (self , file_path : str ) -> List [Dict [str , Any ]]:
13+ with open (file_path , "rb" ) as f :
14+ data = pickle .load (f )
15+
16+ if not isinstance (data , list ):
17+ raise ValueError ("Pickle file must contain a list of documents." )
18+
19+ for doc in data :
20+ if not isinstance (doc , dict ):
21+ raise ValueError ("Every item in the list must be a dict." )
22+ if doc .get ("type" ) == "text" and self .text_column not in doc :
23+ raise ValueError (f"Missing '{ self .text_column } ' in document: { doc } " )
24+
25+ return self .filter (data )
You can’t perform that action at this time.
0 commit comments