Skip to content

Commit 7fb169a

Browse files
author
Stephen Reichling
authored
Merge pull request #10 from ActiveState/jeremyp/cve-23336
Address CVE-2021-23336 for urlparse
2 parents fc70db0 + 369bf3f commit 7fb169a

File tree

4 files changed

+85
-18
lines changed

4 files changed

+85
-18
lines changed

Lib/cgi.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ def nolog(*allargs):
121121
# 0 ==> unlimited input
122122
maxlen = 0
123123

124-
def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
124+
def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0,
125+
separator='&'):
125126
"""Parse a query in the environment or from a file (default stdin)
126127
127128
Arguments, all optional:
@@ -140,6 +141,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
140141
strict_parsing: flag indicating what to do with parsing errors.
141142
If false (the default), errors are silently ignored.
142143
If true, errors raise a ValueError exception.
144+
145+
separator: str. The symbol to use for separating the query arguments.
146+
Defaults to &.
143147
"""
144148
if fp is None:
145149
fp = sys.stdin
@@ -148,7 +152,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
148152
if environ['REQUEST_METHOD'] == 'POST':
149153
ctype, pdict = parse_header(environ['CONTENT_TYPE'])
150154
if ctype == 'multipart/form-data':
151-
return parse_multipart(fp, pdict)
155+
return parse_multipart(fp, pdict, separator=separator)
152156
elif ctype == 'application/x-www-form-urlencoded':
153157
clength = int(environ['CONTENT_LENGTH'])
154158
if maxlen and clength > maxlen:
@@ -171,7 +175,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
171175
else:
172176
qs = ""
173177
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
174-
return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
178+
return urlparse.parse_qs(qs, keep_blank_values, strict_parsing, separator=separator)
175179

176180

177181
# parse query string function called from urlparse,
@@ -191,7 +195,7 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
191195
return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing,
192196
max_num_fields)
193197

194-
def parse_multipart(fp, pdict):
198+
def parse_multipart(fp, pdict, separator='&'):
195199
"""Parse multipart input.
196200
197201
Arguments:
@@ -395,7 +399,7 @@ class FieldStorage:
395399

396400
def __init__(self, fp=None, headers=None, outerboundary="",
397401
environ=os.environ, keep_blank_values=0, strict_parsing=0,
398-
max_num_fields=None):
402+
max_num_fields=None, separator='&'):
399403
"""Constructor. Read multipart/* until last part.
400404
401405
Arguments, all optional:
@@ -430,6 +434,7 @@ def __init__(self, fp=None, headers=None, outerboundary="",
430434
self.keep_blank_values = keep_blank_values
431435
self.strict_parsing = strict_parsing
432436
self.max_num_fields = max_num_fields
437+
self.separator = separator
433438
if 'REQUEST_METHOD' in environ:
434439
method = environ['REQUEST_METHOD'].upper()
435440
self.qs_on_post = None
@@ -613,7 +618,8 @@ def read_urlencoded(self):
613618
if self.qs_on_post:
614619
qs += '&' + self.qs_on_post
615620
query = urlparse.parse_qsl(qs, self.keep_blank_values,
616-
self.strict_parsing, self.max_num_fields)
621+
self.strict_parsing, self.max_num_fields,
622+
self.separator)
617623
self.list = [MiniFieldStorage(key, value) for key, value in query]
618624
self.skip_lines()
619625

@@ -629,7 +635,8 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
629635
query = urlparse.parse_qsl(self.qs_on_post,
630636
self.keep_blank_values,
631637
self.strict_parsing,
632-
self.max_num_fields)
638+
self.max_num_fields,
639+
self.separator)
633640
self.list.extend(MiniFieldStorage(key, value)
634641
for key, value in query)
635642
FieldStorageClass = None
@@ -649,7 +656,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
649656
headers = rfc822.Message(self.fp)
650657
part = klass(self.fp, headers, ib,
651658
environ, keep_blank_values, strict_parsing,
652-
max_num_fields)
659+
max_num_fields, self.separator)
653660

654661
if max_num_fields is not None:
655662
max_num_fields -= 1

Lib/test/test_cgi.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,9 @@ def do_test(buf, method):
6161
("", ValueError("bad query field: ''")),
6262
("&", ValueError("bad query field: ''")),
6363
("&&", ValueError("bad query field: ''")),
64-
(";", ValueError("bad query field: ''")),
65-
(";&;", ValueError("bad query field: ''")),
6664
# Should the next few really be valid?
6765
("=", {}),
6866
("=&=", {}),
69-
("=;=", {}),
7067
# This rest seem to make sense
7168
("=a", {'': ['a']}),
7269
("&=a", ValueError("bad query field: ''")),
@@ -81,8 +78,6 @@ def do_test(buf, method):
8178
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
8279
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
8380
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
84-
("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
85-
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
8681
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
8782
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
8883
'cuyer': ['r'],
@@ -104,6 +99,18 @@ def do_test(buf, method):
10499
})
105100
]
106101

102+
parse_semicolon_test_cases = [
103+
("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
104+
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
105+
(";", ValueError("bad query field: ''")),
106+
(";;", ValueError("bad query field: ''")),
107+
("=;a", ValueError("bad query field: 'a'")),
108+
(";b=a", ValueError("bad query field: ''")),
109+
("b;=a", ValueError("bad query field: 'b'")),
110+
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
111+
("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
112+
]
113+
107114
def first_elts(list):
108115
return map(lambda x:x[0], list)
109116

@@ -177,6 +184,23 @@ def test_strict(self):
177184
self.assertItemsEqual(sd.items(),
178185
first_second_elts(expect.items()))
179186

187+
def test_separator(self):
188+
for orig, expect in parse_semicolon_test_cases:
189+
env = {'QUERY_STRING': orig}
190+
try:
191+
fs = cgi.FieldStorage(separator=';', environ=env, strict_parsing=True)
192+
except ValueError as ve:
193+
self.assertEqual(type(ve), type(expect))
194+
self.assertEqual(ve.args, expect.args)
195+
else:
196+
for key in expect.keys():
197+
expect_val = expect[key]
198+
self.assertIn(key, fs)
199+
if len(expect_val) > 1:
200+
self.assertEqual(fs.getvalue(key), expect_val)
201+
else:
202+
self.assertEqual(fs.getvalue(key), expect_val[0])
203+
180204
def test_weird_formcontentdict(self):
181205
# Test the weird FormContentDict classes
182206
env = {'QUERY_STRING': "x=1&y=2.0&z=2-3.%2b0&1=1abc"}

Lib/test/test_urlparse.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@
2424
("&a=b", [('a', 'b')]),
2525
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
2626
("a=1&a=2", [('a', '1'), ('a', '2')]),
27+
(";a=b", [(';a', 'b')]),
28+
("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
29+
(b";a=b", [(b';a', b'b')]),
30+
(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
31+
]
32+
33+
parse_qsl_semicolon_cases = [
2734
(";", []),
2835
(";;", []),
2936
(";a=b", [('a', 'b')]),
@@ -57,6 +64,13 @@
5764
(b"&a=b", {b'a': [b'b']}),
5865
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
5966
(b"a=1&a=2", {b'a': [b'1', b'2']}),
67+
(";a=b", {';a': ['b']}),
68+
("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
69+
(b";a=b", {b';a': [b'b']}),
70+
(b"a=a+b;b=b+c", {b'a': [b'a b;b=b c']}),
71+
]
72+
73+
parse_qs_semicolon_cases = [
6074
(";", {}),
6175
(";;", {}),
6276
(";a=b", {'a': ['b']}),
@@ -141,6 +155,16 @@ def test_qs(self):
141155
self.assertEqual(result, expect_without_blanks,
142156
"Error parsing %r" % orig)
143157

158+
def test_parse_qsl_separator(self):
159+
for orig, expect in parse_qsl_semicolon_cases:
160+
result = urlparse.parse_qsl(orig, separator=';')
161+
self.assertEqual(result, expect, "Error parsing %r" % orig)
162+
163+
def test_parse_qs_separator(self):
164+
for orig, expect in parse_qs_semicolon_cases:
165+
result = urlparse.parse_qs(orig, separator=';')
166+
self.assertEqual(result, expect, "Error parsing %r" % orig)
167+
144168
def test_roundtrips(self):
145169
testcases = [
146170
('file:///tmp/junk.txt',

Lib/urlparse.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,8 @@ def unquote(s):
382382
append(item)
383383
return ''.join(res)
384384

385-
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
385+
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
386+
separator='&'):
386387
"""Parse a query given as a string argument.
387388
388389
Arguments:
@@ -402,17 +403,21 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
402403
403404
max_num_fields: int. If set, then throws a ValueError if there
404405
are more than n fields read by parse_qsl().
406+
407+
separator: str. The symbol to use for separating the query arguments.
408+
Defaults to &.
405409
"""
406410
dict = {}
407411
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
408-
max_num_fields):
412+
max_num_fields, separator):
409413
if name in dict:
410414
dict[name].append(value)
411415
else:
412416
dict[name] = [value]
413417
return dict
414418

415-
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
419+
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
420+
separator='&'):
416421
"""Parse a query given as a string argument.
417422
418423
Arguments:
@@ -432,17 +437,24 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
432437
max_num_fields: int. If set, then throws a ValueError if there
433438
are more than n fields read by parse_qsl().
434439
440+
separator: str. The symbol to use for separating the query arguments.
441+
Defaults to &.
442+
435443
Returns a list, as G-d intended.
436444
"""
445+
if not separator or (not isinstance(separator, str)
446+
and not isinstance(separator, bytes)):
447+
raise ValueError("Separator must be of type string or bytes.")
448+
437449
# If max_num_fields is defined then check that the number of fields
438450
# is less than max_num_fields. This prevents a memory exhaustion DOS
439451
# attack via post bodies with many fields.
440452
if max_num_fields is not None:
441-
num_fields = 1 + qs.count('&') + qs.count(';')
453+
num_fields = 1 + qs.count(separator)
442454
if max_num_fields < num_fields:
443455
raise ValueError('Max number of fields exceeded')
444456

445-
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
457+
pairs = [s for s in qs.split(separator)]
446458
r = []
447459
for name_value in pairs:
448460
if not name_value and not strict_parsing:

0 commit comments

Comments
 (0)