22
33use memchr;
44use std:: borrow:: Cow ;
5- use std:: collections:: HashMap ;
65use std:: ops:: Range ;
76
87#[ cfg( test) ]
@@ -66,31 +65,15 @@ impl std::error::Error for EscapeError {}
6665/// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
6766/// corresponding xml escaped value.
6867pub fn escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
69- #[ inline]
70- fn to_escape ( b : u8 ) -> bool {
71- match b {
72- b'<' | b'>' | b'\'' | b'&' | b'"' => true ,
73- _ => false ,
74- }
75- }
76-
77- _escape ( raw, to_escape)
68+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' | b'\'' | b'\"' ) )
7869}
7970
8071/// Should only be used for escaping text content. In xml text content, it is allowed
8172/// (though not recommended) to leave the quote special characters " and ' unescaped.
8273/// This function escapes a `&[u8]` and replaces xml special characters (<, >, &) with
8374/// their corresponding xml escaped value, but does not escape quote characters.
8475pub fn partial_escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
85- #[ inline]
86- fn to_escape ( b : u8 ) -> bool {
87- match b {
88- b'<' | b'>' | b'&' => true ,
89- _ => false ,
90- }
91- }
92-
93- _escape ( raw, to_escape)
76+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' ) )
9477}
9578
9679/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
@@ -130,32 +113,22 @@ fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
130113/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
131114/// value
132115pub fn unescape ( raw : & [ u8 ] ) -> Result < Cow < [ u8 ] > , EscapeError > {
133- do_unescape ( raw, None )
116+ unescape_with ( raw, |_| None )
134117}
135118
136119/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
137- /// value, using a dictionnary of custom entities.
120+ /// value, using a dictionary of custom entities.
138121///
139122/// # Pre-condition
140123///
141- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
142- pub fn unescape_with < ' a > (
124+ /// The implementation of `lookup_custom_entity` is expected to operate over UTF-8 inputs .
125+ pub fn unescape_with < ' a , ' b > (
143126 raw : & ' a [ u8 ] ,
144- custom_entities : & HashMap < Vec < u8 > , Vec < u8 > > ,
145- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
146- do_unescape ( raw, Some ( custom_entities) )
147- }
148-
149- /// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
150- /// value, using an optional dictionary of custom entities.
151- ///
152- /// # Pre-condition
153- ///
154- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
155- pub fn do_unescape < ' a > (
156- raw : & ' a [ u8 ] ,
157- custom_entities : Option < & HashMap < Vec < u8 > , Vec < u8 > > > ,
158- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
127+ lookup_custom_entity : impl Fn ( & ' b [ u8 ] ) -> Option < & ' b str > ,
128+ ) -> Result < Cow < ' a , [ u8 ] > , EscapeError >
129+ where
130+ ' a : ' b ,
131+ {
159132 let mut unescaped = None ;
160133 let mut last_end = 0 ;
161134 let mut iter = memchr:: memchr2_iter ( b'&' , b';' , raw) ;
@@ -171,12 +144,14 @@ pub fn do_unescape<'a>(
171144
172145 // search for character correctness
173146 let pat = & raw [ start + 1 ..end] ;
174- if let Some ( s) = named_entity ( pat) {
175- unescaped. extend_from_slice ( s. as_bytes ( ) ) ;
176- } else if pat. starts_with ( b"#" ) {
177- push_utf8 ( unescaped, parse_number ( & pat[ 1 ..] , start..end) ?) ;
178- } else if let Some ( value) = custom_entities. and_then ( |hm| hm. get ( pat) ) {
179- unescaped. extend_from_slice ( & value) ;
147+ if pat. starts_with ( b"#" ) {
148+ let entity = & pat[ 1 ..] ; // starts after the #
149+ let codepoint = parse_number ( entity, start..end) ?;
150+ push_utf8 ( unescaped, codepoint) ;
151+ } else if let Some ( value) = named_entity ( pat) {
152+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
153+ } else if let Some ( value) = lookup_custom_entity ( pat) {
154+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
180155 } else {
181156 return Err ( EscapeError :: UnrecognizedSymbol (
182157 start + 1 ..end,
@@ -1740,18 +1715,20 @@ fn test_unescape() {
17401715
17411716#[ test]
17421717fn test_unescape_with ( ) {
1743- let custom_entities = vec ! [ ( b"foo" . to_vec( ) , b"BAR" . to_vec( ) ) ]
1744- . into_iter ( )
1745- . collect ( ) ;
1746- assert_eq ! ( & * unescape_with( b"test" , & custom_entities) . unwrap( ) , b"test" ) ;
1718+ let custom_entities = |ent : & [ u8 ] | match ent {
1719+ b"foo" => Some ( "BAR" ) ,
1720+ _ => None ,
1721+ } ;
1722+
1723+ assert_eq ! ( & * unescape_with( b"test" , custom_entities) . unwrap( ) , b"test" ) ;
17471724 assert_eq ! (
1748- & * unescape_with( b"<test>" , & custom_entities) . unwrap( ) ,
1725+ & * unescape_with( b"<test>" , custom_entities) . unwrap( ) ,
17491726 b"<test>"
17501727 ) ;
1751- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1752- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1753- assert_eq ! ( & * unescape_with( b"&foo;" , & custom_entities) . unwrap( ) , b"BAR" ) ;
1754- assert ! ( unescape_with( b"&fop;" , & custom_entities) . is_err( ) ) ;
1728+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1729+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1730+ assert_eq ! ( & * unescape_with( b"&foo;" , custom_entities) . unwrap( ) , b"BAR" ) ;
1731+ assert ! ( unescape_with( b"&fop;" , custom_entities) . is_err( ) ) ;
17551732}
17561733
17571734#[ test]
0 commit comments