|
7 | 7 | from collections import Iterable
|
8 | 8 | import configparser
|
9 | 9 | from ..utils.logger import get_logger
|
| 10 | +from ..utils.utils import get_chunk |
10 | 11 |
|
11 | 12 | _ENCODING_UTF8 = 'utf-8'
|
12 | 13 |
|
@@ -198,6 +199,33 @@ def read_jsonline_lazy(filename, encoding=_ENCODING_UTF8, default=None):
|
198 | 199 | file.close()
|
199 | 200 |
|
200 | 201 |
|
| 202 | +def get_jsonline_chunk_lazy(filename, chunk_size, encoding=_ENCODING_UTF8, default=None): |
| 203 | + """ |
| 204 | + use generator to read jsonline items chunk by chunk |
| 205 | + :param filename: source jsonline file |
| 206 | + :param chunk_size: chunk size |
| 207 | + :param encoding: file encoding |
| 208 | + :param default: default value to return when file is not existed |
| 209 | + :return: chunk of some items |
| 210 | + """ |
| 211 | + file_generator = read_jsonline_lazy(filename, encoding, default) |
| 212 | + for chunk in get_chunk(file_generator, chunk_size): |
| 213 | + yield chunk |
| 214 | + |
| 215 | + |
| 216 | +def get_jsonline_chunk(filename, chunk_size, encoding=_ENCODING_UTF8, default=None): |
| 217 | + """ |
| 218 | + read jsonline items chunk by chunk |
| 219 | + :param filename: source jsonline file |
| 220 | + :param chunk_size: chunk size |
| 221 | + :param encoding: file encoding |
| 222 | + :param default: default value to return when file is not existed |
| 223 | + :return: chunk of some items |
| 224 | + """ |
| 225 | + chunk_generator = get_chunk(read_jsonline_lazy(filename, encoding, default), chunk_size) |
| 226 | + return list(chunk_generator) |
| 227 | + |
| 228 | + |
201 | 229 | def write_jsonline(filename, items, encoding=_ENCODING_UTF8, serialize_method=None):
|
202 | 230 | """
|
203 | 231 | write items to file with json line format
|
|
0 commit comments