@@ -4972,10 +4972,33 @@ function ( string $utf8_literal ) {
4972
4972
$ this ->assertSame ( "'👪' " , $ quote ( '👪 ' ) );
4973
4973
$ this ->assertSame ( "'Ʈềʂᴛӏń𝒈 𝙨𝑜ɱê Ū𝐓Ϝ-8 𝒄𝒽ȃᵲ𝛼çṱ𝘦ᴦ𐑈.' " , $ quote ( 'Ʈềʂᴛӏń𝒈 𝙨𝑜ɱê Ū𝐓Ϝ-8 𝒄𝒽ȃᵲ𝛼çṱ𝘦ᴦ𐑈. ' ) );
4974
4974
4975
- // Invalid UTF-8 sequences may fail to be preserved.
4976
- // The following 2-byte sequence with a single quote as the last byte
4977
- // is not a valid UTF-8 sequence. The single quote gets escaped.
4978
- // At the moment, this is the intended behavior.
4975
+ // Invalid UTF-8: An incomplete 2-byte sequence is left unchanged.
4976
+ $ this ->assertSame (
4977
+ "' " . chr ( 0xC0 ) . "' " ,
4978
+ $ quote ( chr ( 0xC0 ) )
4979
+ );
4980
+
4981
+ // Invalid UTF-8: A surrogate pair is left unchanged.
4982
+ $ this ->assertSame (
4983
+ "' " . chr ( 0xED ) . chr ( 0xA0 ) . chr ( 0x80 ) . "' " ,
4984
+ $ quote ( chr ( 0xED ) . chr ( 0xA0 ) . chr ( 0x80 ) )
4985
+ );
4986
+
4987
+ // Invalid UTF-8: Overlong encoding of ASCII NULL is left unchanged.
4988
+ $ this ->assertSame (
4989
+ "' " . chr ( 0xE0 ) . chr ( 0x80 ) . chr ( 0x80 ) . "' " ,
4990
+ $ quote ( chr ( 0xE0 ) . chr ( 0x80 ) . chr ( 0x80 ) )
4991
+ );
4992
+
4993
+ // Invalid UTF-8: A 2-byte sequence prefix, followed by an ASCII NULL.
4994
+ // The NULL is escaped, leaving the C0 prefix an incomplete sequence.
4995
+ $ this ->assertSame (
4996
+ "' " . chr ( 0xC0 ) . "{$ backslash }0 " . "' " ,
4997
+ $ quote ( chr ( 0xC0 ) . chr ( 0 ) )
4998
+ );
4999
+
5000
+ // Invalid UTF-8: A 2-byte sequence prefix, followed by a single quote.
5001
+ // The single quote is escaped, leaving the C0 prefix an incomplete sequence.
4979
5002
$ this ->assertSame (
4980
5003
"' " . chr ( 0xC0 ) . chr ( 39 ) . chr ( 39 ) . "' " ,
4981
5004
$ quote ( chr ( 0xC0 ) . chr ( 39 ) )
0 commit comments