Skip to content

Commit a6b20c5

Browse files
committed
Improve invalid UTF-8 test cases and their docs
1 parent 5196a05 commit a6b20c5

File tree

1 file changed

+29
-4
lines changed

1 file changed

+29
-4
lines changed

tests/WP_SQLite_Driver_Tests.php

+29-4
Original file line numberDiff line numberDiff line change
@@ -4972,10 +4972,35 @@ function ( string $utf8_literal ) {
49724972
$this->assertSame( "'👪'", $quote( '👪' ) );
49734973
$this->assertSame( "'Ʈềʂᴛӏń𝒈 𝙨𝑜ɱê Ū𝐓Ϝ-8 𝒄𝒽ȃᵲ𝛼çṱ𝘦ᴦ𐑈.'", $quote( 'Ʈềʂᴛӏń𝒈 𝙨𝑜ɱê Ū𝐓Ϝ-8 𝒄𝒽ȃᵲ𝛼çṱ𝘦ᴦ𐑈.' ) );
49744974

4975-
// Invalid UTF-8 sequences may fail to be preserved.
4976-
// The following 2-byte sequence with a single quote as the last byte
4977-
// is not a valid UTF-8 sequence. The single quote gets escaped.
4978-
// At the moment, this is the intended behavior.
4975+
// Invalid UTF-8 sequences will be left unchanged:
4976+
4977+
// Invalid UTF-8: Incomplete 2-byte sequence.
4978+
$this->assertSame(
4979+
"'" . chr( 0xC0 ) . "'",
4980+
$quote( chr( 0xC0 ) )
4981+
);
4982+
4983+
// Invalid UTF-8: A surrogate pair.
4984+
$this->assertSame(
4985+
"'" . chr( 0xED ) . chr( 0xA0 ) . chr( 0x80 ) . "'",
4986+
$quote( chr( 0xED ) . chr( 0xA0 ) . chr( 0x80 ) )
4987+
);
4988+
4989+
// Invalid UTF-8: Overlong encoding of ASCII NULL.
4990+
$this->assertSame(
4991+
"'" . chr( 0xE0 ) . chr( 0x80 ) . chr( 0x80 ) . "'",
4992+
$quote( chr( 0xE0 ) . chr( 0x80 ) . chr( 0x80 ) )
4993+
);
4994+
4995+
// Invalid UTF-8: A 2-byte sequence prefix, followed by an ASCII NULL.
4996+
// The NULL is escaped, leaving the C0 prefix an incomplete sequence.
4997+
$this->assertSame(
4998+
"'" . chr( 0xC0 ) . "{$backslash}0" . "'",
4999+
$quote( chr( 0xC0 ) . chr( 0 ) )
5000+
);
5001+
5002+
// Invalid UTF-8: A 2-byte sequence prefix, followed by a single quote.
5003+
// The single quote is escaped, leaving the C0 prefix an incomplete sequence.
49795004
$this->assertSame(
49805005
"'" . chr( 0xC0 ) . chr( 39 ) . chr( 39 ) . "'",
49815006
$quote( chr( 0xC0 ) . chr( 39 ) )

0 commit comments

Comments
 (0)