22
33use memchr;
44use std:: borrow:: Cow ;
5- use std:: collections:: HashMap ;
65use std:: ops:: Range ;
76
87#[ cfg( test) ]
@@ -66,31 +65,15 @@ impl std::error::Error for EscapeError {}
6665/// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
6766/// corresponding xml escaped value.
6867pub fn escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
69- #[ inline]
70- fn to_escape ( b : u8 ) -> bool {
71- match b {
72- b'<' | b'>' | b'\'' | b'&' | b'"' => true ,
73- _ => false ,
74- }
75- }
76-
77- _escape ( raw, to_escape)
68+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' | b'\'' | b'\"' ) )
7869}
7970
8071/// Should only be used for escaping text content. In xml text content, it is allowed
8172/// (though not recommended) to leave the quote special characters " and ' unescaped.
8273/// This function escapes a `&[u8]` and replaces xml special characters (<, >, &) with
8374/// their corresponding xml escaped value, but does not escape quote characters.
8475pub fn partial_escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
85- #[ inline]
86- fn to_escape ( b : u8 ) -> bool {
87- match b {
88- b'<' | b'>' | b'&' => true ,
89- _ => false ,
90- }
91- }
92-
93- _escape ( raw, to_escape)
76+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' ) )
9477}
9578
9679/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
@@ -130,32 +113,23 @@ fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
130113/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
131114/// value
132115pub fn unescape ( raw : & [ u8 ] ) -> Result < Cow < [ u8 ] > , EscapeError > {
133- do_unescape ( raw, None )
134- }
135-
136- /// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
137- /// value, using a dictionnary of custom entities.
138- ///
139- /// # Pre-condition
140- ///
141- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
142- pub fn unescape_with < ' a > (
143- raw : & ' a [ u8 ] ,
144- custom_entities : & HashMap < Vec < u8 > , Vec < u8 > > ,
145- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
146- do_unescape ( raw, Some ( custom_entities) )
116+ unescape_with ( raw, |_| None )
147117}
148118
149119/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
150- /// value, using an optional dictionary of custom entities.
120+ /// value, using a resolver function for custom entities.
151121///
152122/// # Pre-condition
153123///
154- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
155- pub fn do_unescape < ' a > (
156- raw : & ' a [ u8 ] ,
157- custom_entities : Option < & HashMap < Vec < u8 > , Vec < u8 > > > ,
158- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
124+ /// The implementation of `resolve_entity` is expected to operate over UTF-8 inputs.
125+ pub fn unescape_with < ' input , ' entity , F > (
126+ raw : & ' input [ u8 ] ,
127+ resolve_entity : F ,
128+ ) -> Result < Cow < ' input , [ u8 ] > , EscapeError >
129+ where
130+ // the lifetime of the output comes from a capture or is `'static`
131+ F : Fn ( & [ u8 ] ) -> Option < & ' entity str > ,
132+ {
159133 let mut unescaped = None ;
160134 let mut last_end = 0 ;
161135 let mut iter = memchr:: memchr2_iter ( b'&' , b';' , raw) ;
@@ -171,12 +145,14 @@ pub fn do_unescape<'a>(
171145
172146 // search for character correctness
173147 let pat = & raw [ start + 1 ..end] ;
174- if let Some ( s) = named_entity ( pat) {
175- unescaped. extend_from_slice ( s. as_bytes ( ) ) ;
176- } else if pat. starts_with ( b"#" ) {
177- push_utf8 ( unescaped, parse_number ( & pat[ 1 ..] , start..end) ?) ;
178- } else if let Some ( value) = custom_entities. and_then ( |hm| hm. get ( pat) ) {
179- unescaped. extend_from_slice ( & value) ;
148+ if pat. starts_with ( b"#" ) {
149+ let entity = & pat[ 1 ..] ; // starts after the #
150+ let codepoint = parse_number ( entity, start..end) ?;
151+ push_utf8 ( unescaped, codepoint) ;
152+ } else if let Some ( value) = named_entity ( pat) {
153+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
154+ } else if let Some ( value) = resolve_entity ( pat) {
155+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
180156 } else {
181157 return Err ( EscapeError :: UnrecognizedSymbol (
182158 start + 1 ..end,
@@ -1740,18 +1716,20 @@ fn test_unescape() {
17401716
17411717#[ test]
17421718fn test_unescape_with ( ) {
1743- let custom_entities = vec ! [ ( b"foo" . to_vec( ) , b"BAR" . to_vec( ) ) ]
1744- . into_iter ( )
1745- . collect ( ) ;
1746- assert_eq ! ( & * unescape_with( b"test" , & custom_entities) . unwrap( ) , b"test" ) ;
1719+ let custom_entities = |ent : & [ u8 ] | match ent {
1720+ b"foo" => Some ( "BAR" ) ,
1721+ _ => None ,
1722+ } ;
1723+
1724+ assert_eq ! ( & * unescape_with( b"test" , custom_entities) . unwrap( ) , b"test" ) ;
17471725 assert_eq ! (
1748- & * unescape_with( b"<test>" , & custom_entities) . unwrap( ) ,
1726+ & * unescape_with( b"<test>" , custom_entities) . unwrap( ) ,
17491727 b"<test>"
17501728 ) ;
1751- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1752- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1753- assert_eq ! ( & * unescape_with( b"&foo;" , & custom_entities) . unwrap( ) , b"BAR" ) ;
1754- assert ! ( unescape_with( b"&fop;" , & custom_entities) . is_err( ) ) ;
1729+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1730+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1731+ assert_eq ! ( & * unescape_with( b"&foo;" , custom_entities) . unwrap( ) , b"BAR" ) ;
1732+ assert ! ( unescape_with( b"&fop;" , custom_entities) . is_err( ) ) ;
17551733}
17561734
17571735#[ test]
0 commit comments