@@ -63,14 +63,12 @@ pub fn parse_escaped_string<'a>(
6363 let mut numbers = vec ! [ 0 ; UNICODE_LEN ] ;
6464 data. read_exact ( numbers. as_mut_slice ( ) ) ?;
6565 * idx += 4 ;
66- let hex = decode_hex_escape ( numbers, idx) ?;
66+ let hex = decode_hex_escape ( numbers. clone ( ) , idx) ?;
6767
6868 let c = match hex {
69- n @ 0xDC00 ..=0xDFFF => {
70- return Err ( Error :: Syntax (
71- ParseErrorCode :: InvalidLoneLeadingSurrogateInHexEscape ( n) ,
72- * idx,
73- ) ) ;
69+ 0xDC00 ..=0xDFFF => {
70+ encode_invalid_unicode ( numbers, str_buf) ;
71+ return Ok ( data) ;
7472 }
7573
7674 // Non-BMP characters are encoded as a sequence of two hex
@@ -79,37 +77,24 @@ pub fn parse_escaped_string<'a>(
7977 // whereas deserializing a byte string accepts lone surrogates.
8078 n1 @ 0xD800 ..=0xDBFF => {
8179 if data. len ( ) < 2 {
82- return Err ( Error :: Syntax (
83- ParseErrorCode :: UnexpectedEndOfHexEscape ,
84- * idx,
85- ) ) ;
80+ encode_invalid_unicode ( numbers, str_buf) ;
81+ return Ok ( data) ;
8682 }
87- let next_byte = data[ 0 ] ;
88- if next_byte == b'\\' {
89- * idx += 1 ;
90- data = & data[ 1 ..] ;
83+ if data[ 0 ] == b'\\' && data[ 1 ] == b'u' {
84+ * idx += 2 ;
85+ data = & data[ 2 ..] ;
9186 } else {
92- return Err ( Error :: Syntax (
93- ParseErrorCode :: UnexpectedEndOfHexEscape ,
94- * idx,
95- ) ) ;
87+ encode_invalid_unicode ( numbers, str_buf) ;
88+ return Ok ( data) ;
9689 }
97- let next_byte = data[ 0 ] ;
98- if next_byte == b'u' {
99- * idx += 1 ;
100- data = & data[ 1 ..] ;
101- } else {
102- return parse_escaped_string ( data, idx, str_buf) ;
103- }
104- let mut numbers = vec ! [ 0 ; UNICODE_LEN ] ;
105- data. read_exact ( numbers. as_mut_slice ( ) ) ?;
90+ let mut lower_numbers = vec ! [ 0 ; UNICODE_LEN ] ;
91+ data. read_exact ( lower_numbers. as_mut_slice ( ) ) ?;
10692 * idx += 4 ;
107- let n2 = decode_hex_escape ( numbers , idx) ?;
93+ let n2 = decode_hex_escape ( lower_numbers . clone ( ) , idx) ?;
10894 if !( 0xDC00 ..=0xDFFF ) . contains ( & n2) {
109- return Err ( Error :: Syntax (
110- ParseErrorCode :: InvalidSurrogateInHexEscape ( n2) ,
111- * idx,
112- ) ) ;
95+ encode_invalid_unicode ( numbers, str_buf) ;
96+ encode_invalid_unicode ( lower_numbers, str_buf) ;
97+ return Ok ( data) ;
11398 }
11499
115100 let n = ( ( ( n1 - 0xD800 ) as u32 ) << 10 | ( n2 - 0xDC00 ) as u32 ) + 0x1_0000 ;
@@ -127,6 +112,17 @@ pub fn parse_escaped_string<'a>(
127112 Ok ( data)
128113}
129114
115+ // https://datatracker.ietf.org/doc/html/rfc8259#section-8.2
116+ // RFC8259 allow invalid Unicode
117+ #[ inline]
118+ fn encode_invalid_unicode ( numbers : Vec < u8 > , str_buf : & mut String ) {
119+ str_buf. push ( '\\' ) ;
120+ str_buf. push ( 'u' ) ;
121+ for n in numbers {
122+ str_buf. push ( n. into ( ) ) ;
123+ }
124+ }
125+
130126#[ inline]
131127fn decode_hex_val ( val : u8 ) -> Option < u16 > {
132128 let n = HEX [ val as usize ] as u16 ;
0 commit comments