Skip to content

Commit 97602a4

Browse files
authored
Fix proc output stream using incremental decoder
1 parent b23af0a commit 97602a4

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

invoke/runners.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
import codecs
12
import errno
23
import locale
34
import os
5+
import signal
46
import struct
57
import sys
68
import threading
79
import time
8-
import signal
910
from subprocess import Popen, PIPE
1011
from types import TracebackType
1112
from typing import (
@@ -692,8 +693,9 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
692693
:returns:
693694
A generator yielding strings.
694695
695-
Specifically, each resulting string is the result of decoding
696-
`read_chunk_size` bytes read from the subprocess' out/err stream.
696+
Specifically, each resulting string is the result of incrementally
697+
decoding up to `read_chunk_size` bytes from the subprocess' out/err
698+
stream. The decoder ensures that encoding boundaries are respected.
697699
698700
.. versionadded:: 1.0
699701
"""
@@ -703,11 +705,18 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
703705
# process is done running" because sometimes that signal will appear
704706
# before we've actually read all the data in the stream (i.e.: a race
705707
# condition).
708+
decoder_cls = codecs.getincrementaldecoder(self.encoding)
709+
decoder = decoder_cls("replace")
706710
while True:
707711
data = reader(self.read_chunk_size)
708712
if not data:
709713
break
710-
yield self.decode(data)
714+
# The incremental decoder will deal with partial characters.
715+
yield decoder.decode(data)
716+
pending_buf, _ = decoder.getstate()
717+
if pending_buf:
718+
# Emit the final chunk of data
719+
yield decoder.decode(b"", True)
711720

712721
def write_our_output(self, stream: IO, string: str) -> None:
713722
"""
@@ -1020,6 +1029,13 @@ def decode(self, data: bytes) -> str:
10201029
"""
10211030
Decode some ``data`` bytes, returning Unicode.
10221031
1032+
.. warning::
1033+
This function should not be used for streaming data. When data is
1034+
streamed in chunks, one chunk can end with only parts of a
1035+
multi-byte codepoint. This function will return a replacement
1036+
character for the incomplete byte sequence.
1037+
Use a ``codecs.IncrementalDecoder`` instead.
1038+
10231039
.. versionadded:: 1.0
10241040
"""
10251041
# NOTE: yes, this is a 1-liner. The point is to make it much harder to

0 commit comments

Comments
 (0)