Skip to content

Commit 4bf478b

Browse files
author
Lőrinc
committed
Add tests for humongous encodings
1 parent 1b9faf2 commit 4bf478b

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

tests/test_encoding.py

+13
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,19 @@
1111
from .test_helpers import ENCODING_FACTORIES, MAX_EXAMPLES
1212

1313

14+
@pytest.mark.parametrize("make_enc", ENCODING_FACTORIES)
15+
def test_extremely_big_encoding(make_enc: Callable[[], tiktoken.Encoding]):
16+
enc = make_enc()
17+
for c in ["^", "0", "a", " ", "\n", "'s"]:
18+
print(f"Validating `{c}`")
19+
20+
big_value = "^" * 1000000
21+
assert big_value == enc.decode(enc.encode(big_value))
22+
23+
big_value = " " + big_value
24+
assert big_value == enc.decode(enc.encode(big_value))
25+
26+
1427
def test_simple():
1528
enc = tiktoken.get_encoding("gpt2")
1629
assert enc.encode("hello world") == [31373, 995]

0 commit comments

Comments
 (0)