22
33use memchr;
44use std:: borrow:: Cow ;
5- use std:: collections:: HashMap ;
65use std:: ops:: Range ;
76
87#[ cfg( test) ]
@@ -66,31 +65,15 @@ impl std::error::Error for EscapeError {}
6665/// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
6766/// corresponding xml escaped value.
6867pub fn escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
69- #[ inline]
70- fn to_escape ( b : u8 ) -> bool {
71- match b {
72- b'<' | b'>' | b'\'' | b'&' | b'"' => true ,
73- _ => false ,
74- }
75- }
76-
77- _escape ( raw, to_escape)
68+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' | b'\'' | b'\"' ) )
7869}
7970
8071/// Should only be used for escaping text content. In xml text content, it is allowed
8172/// (though not recommended) to leave the quote special characters " and ' unescaped.
8273/// This function escapes a `&[u8]` and replaces xml special characters (<, >, &) with
8374/// their corresponding xml escaped value, but does not escape quote characters.
8475pub fn partial_escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
85- #[ inline]
86- fn to_escape ( b : u8 ) -> bool {
87- match b {
88- b'<' | b'>' | b'&' => true ,
89- _ => false ,
90- }
91- }
92-
93- _escape ( raw, to_escape)
76+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' ) )
9477}
9578
9679/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
@@ -130,32 +113,16 @@ fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
130113/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
131114/// value
132115pub fn unescape ( raw : & [ u8 ] ) -> Result < Cow < [ u8 ] > , EscapeError > {
133- do_unescape ( raw, None )
134- }
135-
136- /// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
137- /// value, using a dictionnary of custom entities.
138- ///
139- /// # Pre-condition
140- ///
141- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
142- pub fn unescape_with < ' a > (
143- raw : & ' a [ u8 ] ,
144- custom_entities : & HashMap < Vec < u8 > , Vec < u8 > > ,
145- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
146- do_unescape ( raw, Some ( custom_entities) )
116+ unescape_with ( raw, |_| None )
147117}
148118
149119/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
150- /// value, using an optional dictionary of custom entities.
120+ /// value, using a resolver function for custom entities.
151121///
152122/// # Pre-condition
153123///
154- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
155- pub fn do_unescape < ' a > (
156- raw : & ' a [ u8 ] ,
157- custom_entities : Option < & HashMap < Vec < u8 > , Vec < u8 > > > ,
158- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
124+ /// The implementation of `lookup_custom_entity` is expected to operate over UTF-8 inputs.
125+ pub fn unescape_with < ' a > ( raw : & ' a [ u8 ] , resolve_entity : impl Fn ( & [ u8 ] ) -> Option < & str > ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
159126 let mut unescaped = None ;
160127 let mut last_end = 0 ;
161128 let mut iter = memchr:: memchr2_iter ( b'&' , b';' , raw) ;
@@ -171,12 +138,14 @@ pub fn do_unescape<'a>(
171138
172139 // search for character correctness
173140 let pat = & raw [ start + 1 ..end] ;
174- if let Some ( s) = named_entity ( pat) {
175- unescaped. extend_from_slice ( s. as_bytes ( ) ) ;
176- } else if pat. starts_with ( b"#" ) {
177- push_utf8 ( unescaped, parse_number ( & pat[ 1 ..] , start..end) ?) ;
178- } else if let Some ( value) = custom_entities. and_then ( |hm| hm. get ( pat) ) {
179- unescaped. extend_from_slice ( & value) ;
141+ if pat. starts_with ( b"#" ) {
142+ let entity = & pat[ 1 ..] ; // starts after the #
143+ let codepoint = parse_number ( entity, start..end) ?;
144+ push_utf8 ( unescaped, codepoint) ;
145+ } else if let Some ( value) = named_entity ( pat) {
146+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
147+ } else if let Some ( value) = resolve_entity ( pat) {
148+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
180149 } else {
181150 return Err ( EscapeError :: UnrecognizedSymbol (
182151 start + 1 ..end,
@@ -1740,18 +1709,20 @@ fn test_unescape() {
17401709
17411710#[ test]
17421711fn test_unescape_with ( ) {
1743- let custom_entities = vec ! [ ( b"foo" . to_vec( ) , b"BAR" . to_vec( ) ) ]
1744- . into_iter ( )
1745- . collect ( ) ;
1746- assert_eq ! ( & * unescape_with( b"test" , & custom_entities) . unwrap( ) , b"test" ) ;
1712+ let custom_entities = |ent : & [ u8 ] | match ent {
1713+ b"foo" => Some ( "BAR" ) ,
1714+ _ => None ,
1715+ } ;
1716+
1717+ assert_eq ! ( & * unescape_with( b"test" , custom_entities) . unwrap( ) , b"test" ) ;
17471718 assert_eq ! (
1748- & * unescape_with( b"<test>" , & custom_entities) . unwrap( ) ,
1719+ & * unescape_with( b"<test>" , custom_entities) . unwrap( ) ,
17491720 b"<test>"
17501721 ) ;
1751- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1752- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1753- assert_eq ! ( & * unescape_with( b"&foo;" , & custom_entities) . unwrap( ) , b"BAR" ) ;
1754- assert ! ( unescape_with( b"&fop;" , & custom_entities) . is_err( ) ) ;
1722+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1723+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1724+ assert_eq ! ( & * unescape_with( b"&foo;" , custom_entities) . unwrap( ) , b"BAR" ) ;
1725+ assert ! ( unescape_with( b"&fop;" , custom_entities) . is_err( ) ) ;
17551726}
17561727
17571728#[ test]
0 commit comments