25
25
import codecs
26
26
import datetime
27
27
from email import message_from_file
28
- from email .header import Header , decode_header
28
+ from email .header import decode_header , make_header
29
29
from email .utils import parsedate_tz , mktime_tz
30
30
from fnmatch import fnmatch
31
- from functools import reduce
32
31
import logging
33
- import operator
34
32
import re
35
33
import sys
36
34
39
37
from django .contrib .auth .models import User
40
38
from django .utils .log import AdminEmailHandler
41
39
from django .utils import six
42
- from django .utils .six .moves import map
43
40
44
41
from patchwork .models import (Patch , Project , Person , Comment , State ,
45
42
DelegationRule , get_default_initial_patch_state )
@@ -63,19 +60,84 @@ def normalise_space(str):
63
60
return whitespace_re .sub (' ' , str ).strip ()
64
61
65
62
63
+ def sanitise_header (header_contents , header_name = None ):
64
+ """Clean and individual mail header.
65
+
66
+ Given a header with header_contents, optionally labelled
67
+ header_name, decode it with decode_header, sanitise it to make
68
+ sure it decodes correctly and contains no invalid characters,
69
+ then encode the result with make_header()
70
+ """
71
+
72
+ # We have some Py2/Py3 issues here.
73
+ #
74
+ # Firstly, the email parser (before we get here)
75
+ # Python 3: headers with weird chars are email.header.Header
76
+ # class, others as str
77
+ # Python 2: every header is an str
78
+ #
79
+ # Secondly, the behaviour of decode_header:
80
+ # Python 3: weird headers are labelled as unknown-8bit
81
+ # Python 2: weird headers are not labelled differently
82
+ #
83
+ # Lastly, aking matters worse, in Python2, unknown-8bit doesn't
84
+ # seem to be supported as an input to make_header, so not only do
85
+ # we have to detect dodgy headers, we have to fix them ourselves.
86
+ #
87
+ # We solve this by catching any Unicode errors, and then manually
88
+ # handling any interesting headers.
89
+
90
+ value = decode_header (header_contents )
91
+ try :
92
+ header = make_header (value ,
93
+ header_name = header_name ,
94
+ continuation_ws = '\t ' )
95
+ except UnicodeDecodeError :
96
+ # At least one of the parts cannot be encoded as ascii.
97
+ # Find out which one and fix it somehow.
98
+ #
99
+ # We get here under Py2 when there's non-7-bit chars in header,
100
+ # or under Py2 or Py3 where decoding with the coding hint fails.
101
+
102
+ new_value = []
103
+
104
+ for (part , coding ) in value :
105
+ # We have random bytes that aren't properly coded.
106
+ # If we had a coding hint, it failed to help.
107
+ if six .PY3 :
108
+ # python3 - force coding to unknown-8bit
109
+ new_value += [(part , 'unknown-8bit' )]
110
+ else :
111
+ # python2 - no support in make_header for unknown-8bit
112
+ # We should do unknown-8bit coding ourselves.
113
+ # For now, we're just going to replace any dubious
114
+ # chars with ?.
115
+ #
116
+ # TODO: replace it with a proper QP unknown-8bit codec.
117
+ new_value += [(part .decode ('ascii' , errors = 'replace' )
118
+ .encode ('ascii' , errors = 'replace' ),
119
+ None )]
120
+
121
+ header = make_header (new_value ,
122
+ header_name = header_name ,
123
+ continuation_ws = '\t ' )
124
+
125
+ return header
126
+
127
+
66
128
def clean_header (header ):
67
129
"""Decode (possibly non-ascii) headers."""
68
- def decode (fragment ):
69
- (frag_str , frag_encoding ) = fragment
70
- if frag_encoding :
71
- return frag_str .decode (frag_encoding )
72
- elif isinstance (frag_str , six .binary_type ): # python 2
73
- return frag_str .decode ()
74
- return frag_str
75
130
76
- fragments = list ( map ( decode , decode_header ( header )) )
131
+ sane_header = sanitise_header ( header )
77
132
78
- return normalise_space (u' ' .join (fragments ))
133
+ # on Py2, we want to do unicode(), on Py3, str().
134
+ # That gets us the decoded, un-wrapped header.
135
+ if six .PY2 :
136
+ header_str = unicode (sane_header )
137
+ else :
138
+ header_str = str (sane_header )
139
+
140
+ return normalise_space (header_str )
79
141
80
142
81
143
def find_project_by_id (list_id ):
@@ -168,10 +230,13 @@ def mail_date(mail):
168
230
169
231
170
232
def mail_headers (mail ):
171
- return reduce (operator .__concat__ ,
172
- ['%s: %s\n ' % (k , Header (v , header_name = k ,
173
- continuation_ws = '\t ' ).encode ())
174
- for (k , v ) in list (mail .items ())])
233
+ headers = [(key , sanitise_header (value , header_name = key ))
234
+ for key , value in mail .items ()]
235
+
236
+ strings = [('%s: %s' % (key , header .encode ()))
237
+ for (key , header ) in headers ]
238
+
239
+ return '\n ' .join (strings )
175
240
176
241
177
242
def find_pull_request (content ):
0 commit comments