1+ import  codecs 
12import  errno 
23import  locale 
34import  os 
5+ import  signal 
46import  struct 
57import  sys 
68import  threading 
79import  time 
8- import  signal 
910from  subprocess  import  Popen , PIPE 
1011from  types  import  TracebackType 
1112from  typing  import  (
@@ -692,8 +693,9 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
692693        :returns: 
693694            A generator yielding strings. 
694695
695-             Specifically, each resulting string is the result of decoding 
696-             `read_chunk_size` bytes read from the subprocess' out/err stream. 
696+             Specifically, each resulting string is the result of incrementally 
697+             decoding up to `read_chunk_size` bytes from the subprocess' out/err 
698+             stream. The decoder ensures that encoding boundaries are respected. 
697699
698700        .. versionadded:: 1.0 
699701        """ 
@@ -703,11 +705,18 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
703705        # process is done running" because sometimes that signal will appear 
704706        # before we've actually read all the data in the stream (i.e.: a race 
705707        # condition). 
708+         decoder_cls  =  codecs .getincrementaldecoder (self .encoding )
709+         decoder  =  decoder_cls ("replace" )
706710        while  True :
707711            data  =  reader (self .read_chunk_size )
708712            if  not  data :
709713                break 
710-             yield  self .decode (data )
714+             # The incremental decoder will deal with partial characters. 
715+             yield  decoder .decode (data )
716+         pending_buf , _  =  decoder .getstate ()
717+         if  pending_buf :
718+             # Emit the final chunk of data 
719+             yield  decoder .decode (b"" , True )
711720
712721    def  write_our_output (self , stream : IO , string : str ) ->  None :
713722        """ 
@@ -1020,6 +1029,13 @@ def decode(self, data: bytes) -> str:
10201029        """ 
10211030        Decode some ``data`` bytes, returning Unicode. 
10221031
1032+         .. warning:: 
1033+             This function should not be used for streaming data. When data is 
1034+             streamed in chunks, one chunk can end with only parts of a 
1035+             multi-byte codepoint. This function will return a replacement 
1036+             character for the incomplete byte sequence. 
1037+             Use a ``codecs.IncrementalDecoder`` instead. 
1038+ 
10231039        .. versionadded:: 1.0 
10241040        """ 
10251041        # NOTE: yes, this is a 1-liner. The point is to make it much harder to 
0 commit comments