From 49f174b27a06d6adb73155d2da5c82f844732c92 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 00:07:24 -0500 Subject: [PATCH 01/16] Add itertoolz.flat --- toolz/itertoolz.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 5049e5eb..68c86e7d 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -13,7 +13,7 @@ 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', - 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') + 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample', 'flat') def remove(predicate, seq): @@ -1055,3 +1055,12 @@ def random_sample(prob, seq, random_state=None): random_state = Random(random_state) return filter(lambda _: random_state.random() < prob, seq) + + +def flat(level, seq): + """ Flatten a sequence by n levels """ + for item in seq: + if level == 0 or not hasattr(item, '__iter__'): + yield item + else: + yield from flat(level - 1, item) \ No newline at end of file From 97ef2b9ea7ab2b96b577deba5d3c64195637e124 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 01:10:19 -0500 Subject: [PATCH 02/16] Add level check. --- toolz/itertoolz.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 68c86e7d..d6426d40 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -1058,7 +1058,9 @@ def random_sample(prob, seq, random_state=None): def flat(level, seq): - """ Flatten a sequence by n levels """ + """ Flatten a possible nested sequence by n levels """ + if level < 0: + raise ValueError("level must be >= 0") for item in seq: if level == 0 or not hasattr(item, '__iter__'): yield item From a6bf579d912231d28037aaba6a31bdf418939662 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 01:11:40 -0500 Subject: [PATCH 03/16] Add tests. --- toolz/tests/test_itertoolz.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index 25e7d39a..ff436b18 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -13,7 +13,7 @@ reduceby, iterate, accumulate, sliding_window, count, partition, partition_all, take_nth, pluck, join, - diff, topk, peek, peekn, random_sample) + diff, topk, peek, peekn, random_sample, flat) from operator import add, mul @@ -547,3 +547,14 @@ def test_random_sample(): assert mk_rsample(b"a") == mk_rsample(u"a") assert raises(TypeError, lambda: mk_rsample([])) + + +def test_flat(): + seq = [1, 2, 3, 4] + assert list(flat(0, seq)) == seq + assert list(flat(1, seq)) == seq + + seq = [1, [2, [3]]] + assert list(flat(0, seq)) == seq + assert list(flat(1, seq)) == [1, 2, [3]] + assert list(flat(2, seq)) == [1, 2, 3] From 15856c97b81363e11dc6e0638de316549d63b35d Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 01:18:32 -0500 Subject: [PATCH 04/16] Add flat to toolz.curried --- toolz/curried/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/toolz/curried/__init__.py b/toolz/curried/__init__.py index 356eddbd..cf8852a1 100644 --- a/toolz/curried/__init__.py +++ b/toolz/curried/__init__.py @@ -65,6 +65,7 @@ drop = toolz.curry(toolz.drop) excepts = toolz.curry(toolz.excepts) filter = toolz.curry(toolz.filter) +flat = toolz.curry(toolz.flat) get = toolz.curry(toolz.get) get_in = toolz.curry(toolz.get_in) groupby = toolz.curry(toolz.groupby) From 3dd7d9ba7e948c61464a19959925a17150048f3e Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 01:20:25 -0500 Subject: [PATCH 05/16] Add newline. --- toolz/itertoolz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index d6426d40..1a5e72e7 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -1065,4 +1065,4 @@ def flat(level, seq): if level == 0 or not hasattr(item, '__iter__'): yield item else: - yield from flat(level - 1, item) \ No newline at end of file + yield from flat(level - 1, item) From d5a4caf3cd9bb6943105a16cfdd52c96ffc38787 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 01:22:22 -0500 Subject: [PATCH 06/16] Break line. --- toolz/itertoolz.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 1a5e72e7..c4be14ff 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -13,7 +13,8 @@ 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', - 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample', 'flat') + 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample', + 'flat') def remove(predicate, seq): From 0e3a5f32c6127f2816a80a921ca4682b3953c3a1 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Aug 2022 01:25:20 -0500 Subject: [PATCH 07/16] Add test for negative levels. --- toolz/tests/test_itertoolz.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index ff436b18..d534f497 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -1,3 +1,4 @@ +import pytest import itertools from itertools import starmap from toolz.utils import raises @@ -558,3 +559,6 @@ def test_flat(): assert list(flat(0, seq)) == seq assert list(flat(1, seq)) == [1, 2, [3]] assert list(flat(2, seq)) == [1, 2, 3] + + with pytest.raises(ValueError): + list(flat(-1, seq)) From d54b37d51fe71d794d415c55e71d85a25f6850e0 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 19 Aug 2022 13:59:31 -0500 Subject: [PATCH 08/16] Update implementation. flat -> flatten Allow -1 to for unregulated recursion Add UDF descend function to decide if itertable invokes recursive call. Add default descend function --- toolz/itertoolz.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 015d561c..a42c2b8d 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -4,7 +4,7 @@ import operator from functools import partial from itertools import filterfalse, zip_longest -from collections.abc import Sequence +from collections.abc import Sequence, Mapping from toolz.utils import no_default @@ -14,7 +14,7 @@ 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample', - 'flat') + 'flatten') def remove(predicate, seq): @@ -1054,12 +1054,25 @@ def random_sample(prob, seq, random_state=None): return filter(lambda _: random_state.random() < prob, seq) -def flat(level, seq): +def _default_descend(x): + return not isinstance(x, (str, bytes, Mapping)) + +def flatten(level, seq, descend=_default_descend): """ Flatten a possible nested sequence by n levels """ - if level < 0: - raise ValueError("level must be >= 0") - for item in seq: - if level == 0 or not hasattr(item, '__iter__'): - yield item - else: - yield from flat(level - 1, item) + if level < -1: + raise ValueError("Level must be >= -1") + if not callable(descend): + raise ValueError("descend must be a callable boolean function") + + def flat(level, seq): + if level == 0: + yield from seq + return + + for item in seq: + if isiterable(item) and descend(item): + yield from flat(level - 1, item) + else: + yield item + + yield from flat(level, seq) From f2abc35b626f08c19db5a1bc18475e612d6b3324 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 19 Aug 2022 13:59:55 -0500 Subject: [PATCH 09/16] Update function name. --- toolz/curried/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolz/curried/__init__.py b/toolz/curried/__init__.py index cf8852a1..e8634b24 100644 --- a/toolz/curried/__init__.py +++ b/toolz/curried/__init__.py @@ -65,7 +65,7 @@ drop = toolz.curry(toolz.drop) excepts = toolz.curry(toolz.excepts) filter = toolz.curry(toolz.filter) -flat = toolz.curry(toolz.flat) +flatten = toolz.curry(toolz.flatten) get = toolz.curry(toolz.get) get_in = toolz.curry(toolz.get_in) groupby = toolz.curry(toolz.groupby) From 44bd2c564e08ed0d753702674be23e81df147f5f Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 19 Aug 2022 14:00:17 -0500 Subject: [PATCH 10/16] Update tests for flatten. --- toolz/tests/test_itertoolz.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index d534f497..d4f5c875 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -14,7 +14,7 @@ reduceby, iterate, accumulate, sliding_window, count, partition, partition_all, take_nth, pluck, join, - diff, topk, peek, peekn, random_sample, flat) + diff, topk, peek, peekn, random_sample, flatten) from operator import add, mul @@ -552,13 +552,27 @@ def test_random_sample(): def test_flat(): seq = [1, 2, 3, 4] - assert list(flat(0, seq)) == seq - assert list(flat(1, seq)) == seq + assert list(flatten(0, seq)) == seq + assert list(flatten(1, seq)) == seq seq = [1, [2, [3]]] - assert list(flat(0, seq)) == seq - assert list(flat(1, seq)) == [1, 2, [3]] - assert list(flat(2, seq)) == [1, 2, 3] - - with pytest.raises(ValueError): - list(flat(-1, seq)) + assert list(flatten(0, seq)) == seq + assert list(flatten(1, seq)) == [1, 2, [3]] + assert list(flatten(2, seq)) == [1, 2, 3] + + # Test mappings + seq = [{'a': 1}, [1, 2, 3]] + assert list(flatten(0, seq)) == seq + assert list(flatten(1, seq)) == [{'a': 1}, 1, 2, 3] + + # Test stringsj + seq = ["asgf", b"abcd"] + assert list(flatten(-1, seq)) == seq + + # Test custom descend function + def descend(x): + if isinstance(x, str): + return len(x) != 1 + return False + seq = ["asdf", [1, 2, 3]] + assert list(flatten(1, seq, descend=descend)) == ["a", "s", "d", "f", [1, 2, 3]] From 1c01b0a6e9fa9040606b1584b83bf1c025705779 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 19 Aug 2022 14:32:00 -0500 Subject: [PATCH 11/16] Add newline for linter. --- toolz/itertoolz.py | 1 + 1 file changed, 1 insertion(+) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index a42c2b8d..c9e1228f 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -1057,6 +1057,7 @@ def random_sample(prob, seq, random_state=None): def _default_descend(x): return not isinstance(x, (str, bytes, Mapping)) + def flatten(level, seq, descend=_default_descend): """ Flatten a possible nested sequence by n levels """ if level < -1: From f2a91e210e6d2b6e32e7b8bdbd57efb2b8864b2b Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 19 Aug 2022 14:41:02 -0500 Subject: [PATCH 12/16] Remove extra space. --- toolz/itertoolz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index c9e1228f..2bd1c1fd 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -1063,7 +1063,7 @@ def flatten(level, seq, descend=_default_descend): if level < -1: raise ValueError("Level must be >= -1") if not callable(descend): - raise ValueError("descend must be a callable boolean function") + raise ValueError("descend must be a callable boolean function") def flat(level, seq): if level == 0: From a926a1380cbafab7328f58439316d623f6608b5c Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 19 Aug 2022 14:41:31 -0500 Subject: [PATCH 13/16] Test argument validation. --- toolz/tests/test_itertoolz.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index d4f5c875..7786a893 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -576,3 +576,9 @@ def descend(x): return False seq = ["asdf", [1, 2, 3]] assert list(flatten(1, seq, descend=descend)) == ["a", "s", "d", "f", [1, 2, 3]] + + with pytest.raises(ValueError): + list(flatten(0, [1, 2], descend=True)) + + with pytest.raises(ValueError): + list(flatten(-2, [1, 2])) From 24bbff38143451528de3d88202d7c557b87e1062 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 23 Aug 2022 09:31:03 -0500 Subject: [PATCH 14/16] Add docstring for flatten. --- toolz/itertoolz.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 2bd1c1fd..635038f5 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -485,6 +485,7 @@ def concat(seqs): See also: itertools.chain.from_iterable equivalent + flatten """ return itertools.chain.from_iterable(seqs) @@ -1059,7 +1060,34 @@ def _default_descend(x): def flatten(level, seq, descend=_default_descend): - """ Flatten a possible nested sequence by n levels """ + """ Flatten a possibly nested sequence + + The flattening is depth limited. A level 0 flattening will + yield the input sequence unchanged. A level -1 flattening + will flatten all possible levels of nesting. + + >>> list(flatten(0, [1, [2], [[3]]])) # flatten 0 levels + [1, [2], [[3]]] + >>> list(flatten(1, [1, [2], [[3]]])) # flatten 1 level + [1, 2, [3]] + >>> list(flatten(2, [1, [2], [[3]]])) + [1, 2, 3] + >>> list(flatten(-1, [1, [[[[2]]]]])) # flatten all levels + [1, 2] + + An optional ``descend`` function can be provided by the user + to determine which iterable objects to recurse into. This function + should return a boolean with True meaning it is permissible to descend + another level of recursion. The recursion limit of the Python interpreter + is the ultimate bounding factor on depth. By default, stings, bytes, + and mappings are exempted. + + >>> list(flatten(-1, ['abc', [{'a': 2}, [b'123']]])) + ['abc', {'a': 2}, b'123'] + + See also: + concat + """ if level < -1: raise ValueError("Level must be >= -1") if not callable(descend): From 0d58686fd7f29d76c1f81769621b6c4bbc5f7912 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 23 Aug 2022 09:34:23 -0500 Subject: [PATCH 15/16] Mention Javascript inspiration. --- toolz/itertoolz.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 635038f5..dd4d92e6 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -1062,7 +1062,8 @@ def _default_descend(x): def flatten(level, seq, descend=_default_descend): """ Flatten a possibly nested sequence - The flattening is depth limited. A level 0 flattening will + Inspired by Javascript's Array.flat(), this is a recursive, + depth limited flattening generator. A level 0 flattening will yield the input sequence unchanged. A level -1 flattening will flatten all possible levels of nesting. From e1f364d5bd50708b94c2c8cab419c85bc829acb1 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 23 Aug 2022 09:36:27 -0500 Subject: [PATCH 16/16] Remove trailing whitespace. --- toolz/itertoolz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index dd4d92e6..7bcec912 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -1080,7 +1080,7 @@ def flatten(level, seq, descend=_default_descend): to determine which iterable objects to recurse into. This function should return a boolean with True meaning it is permissible to descend another level of recursion. The recursion limit of the Python interpreter - is the ultimate bounding factor on depth. By default, stings, bytes, + is the ultimate bounding factor on depth. By default, stings, bytes, and mappings are exempted. >>> list(flatten(-1, ['abc', [{'a': 2}, [b'123']]]))