Skip to content

Commit d85ccc6

Browse files
implement get_jsonline_chunk_lazy and get_jsonline_chunk
1 parent cb63d4a commit d85ccc6

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

pysenal/io/file.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from collections import Iterable
88
import configparser
99
from ..utils.logger import get_logger
10+
from ..utils.utils import get_chunk
1011

1112
_ENCODING_UTF8 = 'utf-8'
1213

@@ -198,6 +199,33 @@ def read_jsonline_lazy(filename, encoding=_ENCODING_UTF8, default=None):
198199
file.close()
199200

200201

202+
def get_jsonline_chunk_lazy(filename, chunk_size, encoding=_ENCODING_UTF8, default=None):
203+
"""
204+
use generator to read jsonline items chunk by chunk
205+
:param filename: source jsonline file
206+
:param chunk_size: chunk size
207+
:param encoding: file encoding
208+
:param default: default value to return when file is not existed
209+
:return: chunk of some items
210+
"""
211+
file_generator = read_jsonline_lazy(filename, encoding, default)
212+
for chunk in get_chunk(file_generator, chunk_size):
213+
yield chunk
214+
215+
216+
def get_jsonline_chunk(filename, chunk_size, encoding=_ENCODING_UTF8, default=None):
217+
"""
218+
read jsonline items chunk by chunk
219+
:param filename: source jsonline file
220+
:param chunk_size: chunk size
221+
:param encoding: file encoding
222+
:param default: default value to return when file is not existed
223+
:return: chunk of some items
224+
"""
225+
chunk_generator = get_chunk(read_jsonline_lazy(filename, encoding, default), chunk_size)
226+
return list(chunk_generator)
227+
228+
201229
def write_jsonline(filename, items, encoding=_ENCODING_UTF8, serialize_method=None):
202230
"""
203231
write items to file with json line format

tests/io/test_file.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: UTF-8 -*-
22
import tempfile
33
import pytest
4+
import types
45
from decimal import Decimal
56
from pysenal.io.file import *
67
from pysenal.utils import json_serialize
@@ -71,6 +72,13 @@ def test_read_jsonline(example_json, fake_filename):
7172
assert read_jsonline(TEST_DATA_DIR + 'a.jsonl') == example_json
7273

7374

75+
def test_read_jsonline_chunk(example_json):
76+
assert get_jsonline_chunk(TEST_DATA_DIR + 'a.jsonl', 2) == [example_json]
77+
generator = get_jsonline_chunk_lazy(TEST_DATA_DIR + 'a.jsonl', 2)
78+
assert isinstance(generator, types.GeneratorType)
79+
assert list(generator) == [example_json]
80+
81+
7482
def test_write_lines(example_lines):
7583
dirname = tempfile.gettempdir() + '/'
7684
filename = dirname + 'a.txt'

0 commit comments

Comments
 (0)