1+ import codecs
12import errno
23import locale
34import os
5+ import signal
46import struct
57import sys
68import threading
79import time
8- import signal
910from subprocess import Popen , PIPE
1011from types import TracebackType
1112from typing import (
@@ -692,8 +693,9 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
692693 :returns:
693694 A generator yielding strings.
694695
695- Specifically, each resulting string is the result of decoding
696- `read_chunk_size` bytes read from the subprocess' out/err stream.
696+ Specifically, each resulting string is the result of incrementally
697+ decoding up to `read_chunk_size` bytes from the subprocess' out/err
698+ stream. The decoder ensures that encoding boundaries are respected.
697699
698700 .. versionadded:: 1.0
699701 """
@@ -703,11 +705,18 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
703705 # process is done running" because sometimes that signal will appear
704706 # before we've actually read all the data in the stream (i.e.: a race
705707 # condition).
708+ decoder_cls = codecs .getincrementaldecoder (self .encoding )
709+ decoder = decoder_cls ("replace" )
706710 while True :
707711 data = reader (self .read_chunk_size )
708712 if not data :
709713 break
710- yield self .decode (data )
714+ # The incremental decoder will deal with partial characters.
715+ yield decoder .decode (data )
716+ pending_buf , _ = decoder .getstate ()
717+ if pending_buf :
718+ # Emit the final chunk of data
719+ yield decoder .decode (b"" , True )
711720
712721 def write_our_output (self , stream : IO , string : str ) -> None :
713722 """
@@ -1020,6 +1029,13 @@ def decode(self, data: bytes) -> str:
10201029 """
10211030 Decode some ``data`` bytes, returning Unicode.
10221031
1032+ .. warning::
1033+ This function should not be used for streaming data. When data is
1034+ streamed in chunks, one chunk can end with only parts of a
1035+ multi-byte codepoint. This function will return a replacement
1036+ character for the incomplete byte sequence.
1037+ Use a ``codecs.IncrementalDecoder`` instead.
1038+
10231039 .. versionadded:: 1.0
10241040 """
10251041 # NOTE: yes, this is a 1-liner. The point is to make it much harder to
0 commit comments