Skip to content

Commit 71e3ba0

Browse files
committed
more thorough unicode tainting
1 parent 12dd576 commit 71e3ba0

File tree

11 files changed

+148
-84
lines changed

11 files changed

+148
-84
lines changed

Include/unicodeobject.h

+13-2
Original file line numberDiff line numberDiff line change
@@ -472,9 +472,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicodeT(
472472
const Py_UNICODE *u, Py_ssize_t size, PyObject *taint);
473473

474474
/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
475-
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
475+
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSizeT(
476476
const char *u, /* char buffer */
477-
Py_ssize_t size /* size of buffer */
477+
Py_ssize_t size, /* size of buffer */
478+
PyObject *taint
478479
);
479480

480481
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
@@ -679,6 +680,7 @@ PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
679680
PyAPI_FUNC(PyObject*) PyUnicode_Decode(
680681
const char *s, /* encoded string */
681682
Py_ssize_t size, /* size of buffer */
683+
PyObject *taint,
682684
const char *encoding, /* encoding */
683685
const char *errors /* error handling */
684686
);
@@ -689,6 +691,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_Decode(
689691
PyAPI_FUNC(PyObject*) PyUnicode_Encode(
690692
const Py_UNICODE *s, /* Unicode char buffer */
691693
Py_ssize_t size, /* number of Py_UNICODE chars to encode */
694+
PyObject *taint,
692695
const char *encoding, /* encoding */
693696
const char *errors /* error handling */
694697
);
@@ -721,19 +724,22 @@ PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
721724
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
722725
const char *string, /* UTF-7 encoded string */
723726
Py_ssize_t length, /* size of string */
727+
PyObject *taint,
724728
const char *errors /* error handling */
725729
);
726730

727731
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
728732
const char *string, /* UTF-7 encoded string */
729733
Py_ssize_t length, /* size of string */
734+
PyObject *taint,
730735
const char *errors, /* error handling */
731736
Py_ssize_t *consumed /* bytes consumed */
732737
);
733738

734739
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
735740
const Py_UNICODE *data, /* Unicode char buffer */
736741
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
742+
PyObject *taint,
737743
int encodeSetO, /* force the encoder to encode characters in
738744
Set O, as described in RFC2152 */
739745
int encodeWhiteSpace, /* force the encoder to encode space, tab,
@@ -974,6 +980,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(
974980
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
975981
const char *string, /* Latin-1 encoded string */
976982
Py_ssize_t length, /* size of string */
983+
PyObject *taint,
977984
const char *errors /* error handling */
978985
);
979986

@@ -997,6 +1004,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
9971004
PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
9981005
const char *string, /* ASCII encoded string */
9991006
Py_ssize_t length, /* size of string */
1007+
PyObject *taint,
10001008
const char *errors /* error handling */
10011009
);
10021010

@@ -1036,6 +1044,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
10361044
PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
10371045
const char *string, /* Encoded string */
10381046
Py_ssize_t length, /* size of string */
1047+
PyObject *taint,
10391048
PyObject *mapping, /* character mapping
10401049
(char ordinal -> unicode ordinal) */
10411050
const char *errors /* error handling */
@@ -1050,6 +1059,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
10501059
PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
10511060
const Py_UNICODE *data, /* Unicode char buffer */
10521061
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1062+
PyObject *taint,
10531063
PyObject *mapping, /* character mapping
10541064
(unicode ordinal -> char ordinal) */
10551065
const char *errors /* error handling */
@@ -1071,6 +1081,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
10711081
PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
10721082
const Py_UNICODE *data, /* Unicode char buffer */
10731083
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1084+
PyObject *taint,
10741085
PyObject *table, /* Translate table */
10751086
const char *errors /* error handling */
10761087
);

Modules/_codecsmodule.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ utf_7_decode(PyObject *self,
243243
return NULL;
244244
consumed = pbuf.len;
245245

246-
decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
246+
decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, 0, errors,
247247
final ? NULL : &consumed);
248248
PyBuffer_Release(&pbuf);
249249
if (decoded == NULL)
@@ -526,7 +526,7 @@ latin_1_decode(PyObject *self,
526526
&pbuf, &errors))
527527
return NULL;
528528

529-
unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
529+
unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, 0, errors);
530530
PyBuffer_Release(&pbuf);
531531
return codec_tuple(unicode, pbuf.len);
532532
}
@@ -543,7 +543,7 @@ ascii_decode(PyObject *self,
543543
&pbuf, &errors))
544544
return NULL;
545545

546-
unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
546+
unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, 0, errors);
547547
PyBuffer_Release(&pbuf);
548548
return codec_tuple(unicode, pbuf.len);
549549
}
@@ -563,7 +563,7 @@ charmap_decode(PyObject *self,
563563
if (mapping == Py_None)
564564
mapping = NULL;
565565

566-
unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
566+
unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, 0, mapping, errors);
567567
PyBuffer_Release(&pbuf);
568568
return codec_tuple(unicode, pbuf.len);
569569
}
@@ -672,6 +672,7 @@ utf_7_encode(PyObject *self,
672672
return NULL;
673673
v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
674674
PyUnicode_GET_SIZE(str),
675+
PyUnicode_TAINT(str),
675676
0,
676677
0,
677678
errors),
@@ -968,6 +969,7 @@ charmap_encode(PyObject *self,
968969
v = codec_tuple(PyUnicode_EncodeCharmap(
969970
PyUnicode_AS_UNICODE(str),
970971
PyUnicode_GET_SIZE(str),
972+
PyUnicode_TAINT(str),
971973
mapping,
972974
errors),
973975
PyUnicode_GET_SIZE(str));

Modules/posixmodule.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2035,7 +2035,7 @@ posix_getcwdu(PyObject *self, PyObject *noargs)
20352035
Py_END_ALLOW_THREADS
20362036
if (res == NULL)
20372037
return posix_error();
2038-
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
2038+
return PyUnicode_Decode(buf, strlen(buf), 0, Py_FileSystemDefaultEncoding,"strict");
20392039
}
20402040
#endif
20412041
#endif

Objects/bytearrayobject.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3094,7 +3094,7 @@ bytes_reduce(PyByteArrayObject *self)
30943094
PyObject *latin1, *dict;
30953095
if (self->ob_bytes)
30963096
latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3097-
Py_SIZE(self), NULL);
3097+
Py_SIZE(self), 0, NULL);
30983098
else
30993099
latin1 = PyUnicode_FromString("");
31003100

Objects/stringlib/string_format.h

+14-3
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,23 @@ output_extend(OutputString *output, Py_ssize_t count)
133133
1 for success.
134134
*/
135135
static int
136-
output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
136+
output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count, PyObject *taint)
137137
{
138138
if ((count > output->end - output->ptr) && !output_extend(output, count))
139139
return 0;
140140
memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
141141
output->ptr += count;
142+
143+
if (STRINGLIB_TAINT(output->obj) == 0 || STRINGLIB_TAINT(output->obj) == Py_None) {
144+
STRINGLIB_TAINT(output->obj) = taint;
145+
} else if (taint && taint != Py_None) {
146+
PyObject *t = PyObject_CallMethodObjArgs(STRINGLIB_TAINT(output->obj), PyString_FromString("merge"), taint, 0);
147+
if (!t)
148+
return 0;
149+
Py_XDECREF(STRINGLIB_TAINT(output->obj));
150+
STRINGLIB_TAINT(output->obj) = t;
151+
}
152+
142153
return 1;
143154
}
144155

@@ -546,7 +557,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
546557
#endif
547558

548559
ok = output_data(output,
549-
STRINGLIB_STR(result), STRINGLIB_LEN(result));
560+
STRINGLIB_STR(result), STRINGLIB_LEN(result), STRINGLIB_TAINT(result));
550561
done:
551562
Py_XDECREF(format_spec_object);
552563
Py_XDECREF(result);
@@ -878,7 +889,7 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
878889
while ((result = MarkupIterator_next(&iter, &literal, &field_name,
879890
&format_spec, &conversion,
880891
&format_spec_needs_expanding)) == 2) {
881-
if (!output_data(output, literal.ptr, literal.end - literal.ptr))
892+
if (!output_data(output, literal.ptr, literal.end - literal.ptr, 0))
882893
return 0;
883894
if (field_name.ptr != field_name.end)
884895
if (!output_markup(&field_name, &format_spec,

Objects/stringlib/stringdefs.h

+1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@
2424
#define STRINGLIB_CMP memcmp
2525
#define STRINGLIB_TOSTR PyObject_Str
2626
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
27+
#define STRINGLIB_TAINT(o) (((PyStringObject *)o)->ob_taint)
2728

2829
#endif /* !STRINGLIB_STRINGDEFS_H */

Objects/stringlib/unicodedefs.h

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define STRINGLIB_RESIZE PyUnicode_Resize
2323
#define STRINGLIB_CHECK PyUnicode_Check
2424
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
25+
#define STRINGLIB_TAINT(o) (((PyUnicodeObject *) o)->ob_taint)
2526

2627
#if PY_VERSION_HEX < 0x03000000
2728
#define STRINGLIB_TOSTR PyObject_Unicode

Objects/stringobject.c

+17-1
Original file line numberDiff line numberDiff line change
@@ -4966,6 +4966,22 @@ PyString_Format(PyObject *format, PyObject *args)
49664966
goto unicode;
49674967
}
49684968
#endif
4969+
if (PyString_TAINT(temp) && PyString_TAINT(temp) != Py_None) {
4970+
if (PyString_TAINT(result) && PyString_TAINT(result) != Py_None) {
4971+
PyObject *taint = PyObject_CallMethodObjArgs(PyString_TAINT(result), PyString_FromString("merge"), PyString_TAINT(temp), 0);
4972+
if (!temp) {
4973+
Py_XDECREF(temp);
4974+
goto error;
4975+
}
4976+
Py_XDECREF(PyString_TAINT(result));
4977+
((PyStringObject *) result)->ob_taint = taint;
4978+
} else {
4979+
Py_XINCREF(PyString_TAINT(temp));
4980+
Py_XDECREF(PyString_TAINT(result));
4981+
((PyStringObject *) result)->ob_taint = PyString_TAINT(temp);
4982+
}
4983+
}
4984+
49694985
/* Fall through */
49704986
case 'r':
49714987
if (c == 'r')
@@ -5205,7 +5221,7 @@ PyString_Format(PyObject *format, PyObject *args)
52055221
goto error;
52065222
fmtcnt = PyString_GET_SIZE(format) - \
52075223
(fmt - PyString_AS_STRING(format));
5208-
format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5224+
format = PyUnicode_Decode(fmt, fmtcnt, 0, NULL, NULL);
52095225
if (format == NULL)
52105226
goto error;
52115227
v = PyUnicode_Format(format, args);

0 commit comments

Comments
 (0)