@@ -4972,10 +4972,35 @@ function ( string $utf8_literal ) {
4972
4972
$ this ->assertSame ( "'👪' " , $ quote ( '👪 ' ) );
4973
4973
$ this ->assertSame ( "'Ʈềʂᴛӏń𝒈 𝙨𝑜ɱê Ū𝐓Ϝ-8 𝒄𝒽ȃᵲ𝛼çṱ𝘦ᴦ𐑈.' " , $ quote ( 'Ʈềʂᴛӏń𝒈 𝙨𝑜ɱê Ū𝐓Ϝ-8 𝒄𝒽ȃᵲ𝛼çṱ𝘦ᴦ𐑈. ' ) );
4974
4974
4975
- // Invalid UTF-8 sequences may fail to be preserved.
4976
- // The following 2-byte sequence with a single quote as the last byte
4977
- // is not a valid UTF-8 sequence. The single quote gets escaped.
4978
- // At the moment, this is the intended behavior.
4975
+ // Invalid UTF-8 sequences will be left unchanged:
4976
+
4977
+ // Invalid UTF-8: Incomplete 2-byte sequence.
4978
+ $ this ->assertSame (
4979
+ "' " . chr ( 0xC0 ) . "' " ,
4980
+ $ quote ( chr ( 0xC0 ) )
4981
+ );
4982
+
4983
+ // Invalid UTF-8: A surrogate pair.
4984
+ $ this ->assertSame (
4985
+ "' " . chr ( 0xED ) . chr ( 0xA0 ) . chr ( 0x80 ) . "' " ,
4986
+ $ quote ( chr ( 0xED ) . chr ( 0xA0 ) . chr ( 0x80 ) )
4987
+ );
4988
+
4989
+ // Invalid UTF-8: Overlong encoding of ASCII NULL.
4990
+ $ this ->assertSame (
4991
+ "' " . chr ( 0xE0 ) . chr ( 0x80 ) . chr ( 0x80 ) . "' " ,
4992
+ $ quote ( chr ( 0xE0 ) . chr ( 0x80 ) . chr ( 0x80 ) )
4993
+ );
4994
+
4995
+ // Invalid UTF-8: A 2-byte sequence prefix, followed by an ASCII NULL.
4996
+ // The NULL is escaped, leaving the C0 prefix an incomplete sequence.
4997
+ $ this ->assertSame (
4998
+ "' " . chr ( 0xC0 ) . "{$ backslash }0 " . "' " ,
4999
+ $ quote ( chr ( 0xC0 ) . chr ( 0 ) )
5000
+ );
5001
+
5002
+ // Invalid UTF-8: A 2-byte sequence prefix, followed by a single quote.
5003
+ // The single quote is escaped, leaving the C0 prefix an incomplete sequence.
4979
5004
$ this ->assertSame (
4980
5005
"' " . chr ( 0xC0 ) . chr ( 39 ) . chr ( 39 ) . "' " ,
4981
5006
$ quote ( chr ( 0xC0 ) . chr ( 39 ) )
0 commit comments