Skip to content

Commit 6778cd3

Browse files
Merge pull request #81 from open-sciencelab/feature/pickle-reader
feat: add PickleReader
2 parents 5bb6deb + 87b9d1c commit 6778cd3

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pickle
2+
from typing import Any, Dict, List
3+
4+
from graphgen.bases.base_reader import BaseReader
5+
6+
7+
class PickleReader(BaseReader):
8+
"""
9+
Read pickle files, requiring the top-level object to be List[Dict[str, Any]].
10+
"""
11+
12+
def read(self, file_path: str) -> List[Dict[str, Any]]:
13+
with open(file_path, "rb") as f:
14+
data = pickle.load(f)
15+
16+
if not isinstance(data, list):
17+
raise ValueError("Pickle file must contain a list of documents.")
18+
19+
for doc in data:
20+
if not isinstance(doc, dict):
21+
raise ValueError("Every item in the list must be a dict.")
22+
if doc.get("type") == "text" and self.text_column not in doc:
23+
raise ValueError(f"Missing '{self.text_column}' in document: {doc}")
24+
25+
return self.filter(data)

0 commit comments

Comments
 (0)