1
+ import codecs
1
2
import errno
2
3
import locale
3
4
import os
5
+ import signal
4
6
import struct
5
7
import sys
6
8
import threading
7
9
import time
8
- import signal
9
10
from subprocess import Popen , PIPE
10
11
from types import TracebackType
11
12
from typing import (
@@ -692,8 +693,9 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
692
693
:returns:
693
694
A generator yielding strings.
694
695
695
- Specifically, each resulting string is the result of decoding
696
- `read_chunk_size` bytes read from the subprocess' out/err stream.
696
+ Specifically, each resulting string is the result of incrementally
697
+ decoding up to `read_chunk_size` bytes from the subprocess' out/err
698
+ stream. The decoder ensures that encoding boundaries are respected.
697
699
698
700
.. versionadded:: 1.0
699
701
"""
@@ -703,11 +705,18 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
703
705
# process is done running" because sometimes that signal will appear
704
706
# before we've actually read all the data in the stream (i.e.: a race
705
707
# condition).
708
+ decoder_cls = codecs .getincrementaldecoder (self .encoding )
709
+ decoder = decoder_cls ("replace" )
706
710
while True :
707
711
data = reader (self .read_chunk_size )
708
712
if not data :
709
713
break
710
- yield self .decode (data )
714
+ # The incremental decoder will deal with partial characters.
715
+ yield decoder .decode (data )
716
+ pending_buf , _ = decoder .getstate ()
717
+ if pending_buf :
718
+ # Emit the final chunk of data
719
+ yield decoder .decode (b"" , True )
711
720
712
721
def write_our_output (self , stream : IO , string : str ) -> None :
713
722
"""
@@ -1020,6 +1029,13 @@ def decode(self, data: bytes) -> str:
1020
1029
"""
1021
1030
Decode some ``data`` bytes, returning Unicode.
1022
1031
1032
+ .. warning::
1033
+ This function should not be used for streaming data. When data is
1034
+ streamed in chunks, one chunk can end with only parts of a
1035
+ multi-byte codepoint. This function will return a replacement
1036
+ character for the incomplete byte sequence.
1037
+ Use a ``codecs.IncrementalDecoder`` instead.
1038
+
1023
1039
.. versionadded:: 1.0
1024
1040
"""
1025
1041
# NOTE: yes, this is a 1-liner. The point is to make it much harder to
0 commit comments