Skip to content

Commit 2ee283e

Browse files
dralleyMingun
authored andcommitted
More correctly normalize attribute values
1 parent 7e83b32 commit 2ee283e

File tree

8 files changed

+420
-32
lines changed

8 files changed

+420
-32
lines changed

Changelog.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,19 @@
2222
- `Deserializer::buffering_with_resolver`
2323
- [#878]: Add ability to serialize structs in `$value` fields. The struct name will
2424
be used as a tag name. Previously only enums was allowed there.
25+
- [#371]: Improved compliance with the XML attribute value normalization process by adding
26+
- `Attribute::normalized_value()`
27+
- `Attribute::normalized_value_with()`
28+
- `Attribute::decoded_and_normalized_value()`
29+
- `Attribute::decoded_and_normalized_value_with()`
30+
31+
which ought to be used in place of deprecated
32+
- `Attribute::unescape_value()`
33+
- `Attribute::unescape_value_with()`
34+
- `Attribute::decode_and_unescape_value()`
35+
- `Attribute::decode_and_unescape_value_with()`
36+
37+
Deprecated functions now behaves the same as newly added.
2538

2639
### Bug Fixes
2740

benches/macrobenches.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,13 @@ static INPUTS: &[(&str, &str)] = &[
4444
("players.xml", PLAYERS),
4545
];
4646

47-
// TODO: use fully normalized attribute values
4847
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
4948
let mut r = Reader::from_str(doc);
5049
loop {
5150
match black_box(r.read_event()?) {
5251
Event::Start(e) | Event::Empty(e) => {
5352
for attr in e.attributes() {
54-
black_box(attr?.decode_and_unescape_value(r.decoder())?);
53+
black_box(attr?.decoded_and_normalized_value(r.decoder())?);
5554
}
5655
}
5756
Event::Text(e) => {
@@ -68,15 +67,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
6867
Ok(())
6968
}
7069

71-
// TODO: use fully normalized attribute values
7270
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
7371
let mut r = Reader::from_reader(doc);
7472
let mut buf = Vec::new();
7573
loop {
7674
match black_box(r.read_event_into(&mut buf)?) {
7775
Event::Start(e) | Event::Empty(e) => {
7876
for attr in e.attributes() {
79-
black_box(attr?.decode_and_unescape_value(r.decoder())?);
77+
black_box(attr?.decoded_and_normalized_value(r.decoder())?);
8078
}
8179
}
8280
Event::Text(e) => {
@@ -94,15 +92,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
9492
Ok(())
9593
}
9694

97-
// TODO: use fully normalized attribute values
9895
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
9996
let mut r = NsReader::from_str(doc);
10097
loop {
10198
match black_box(r.read_resolved_event()?) {
10299
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
103100
black_box(resolved_ns);
104101
for attr in e.attributes() {
105-
black_box(attr?.decode_and_unescape_value(r.decoder())?);
102+
black_box(attr?.decoded_and_normalized_value(r.decoder())?);
106103
}
107104
}
108105
(resolved_ns, Event::Text(e)) => {
@@ -121,7 +118,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
121118
Ok(())
122119
}
123120

124-
// TODO: use fully normalized attribute values
125121
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
126122
let mut r = NsReader::from_reader(doc);
127123
let mut buf = Vec::new();
@@ -130,7 +126,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
130126
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
131127
black_box(resolved_ns);
132128
for attr in e.attributes() {
133-
black_box(attr?.decode_and_unescape_value(r.decoder())?);
129+
black_box(attr?.decoded_and_normalized_value(r.decoder())?);
134130
}
135131
}
136132
(resolved_ns, Event::Text(e)) => {

benches/microbenches.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,50 @@ fn attributes(c: &mut Criterion) {
243243
assert_eq!(count, 150);
244244
})
245245
});
246+
247+
group.finish();
248+
}
249+
250+
/// Benchmarks normalizing attribute values
251+
fn attribute_value_normalization(c: &mut Criterion) {
252+
let mut group = c.benchmark_group("attribute_value_normalization");
253+
254+
group.bench_function("noop_short", |b| {
255+
b.iter(|| {
256+
black_box(unescape("foobar")).unwrap();
257+
})
258+
});
259+
260+
group.bench_function("noop_long", |b| {
261+
b.iter(|| {
262+
black_box(unescape("just a bit of text without any entities")).unwrap();
263+
})
264+
});
265+
266+
group.bench_function("replacement_chars", |b| {
267+
b.iter(|| {
268+
black_box(unescape("just a bit\n of text without\tany entities")).unwrap();
269+
})
270+
});
271+
272+
group.bench_function("char_reference", |b| {
273+
b.iter(|| {
274+
let text = "prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;";
275+
black_box(unescape(text)).unwrap();
276+
let text = "&#38;&#60;";
277+
black_box(unescape(text)).unwrap();
278+
})
279+
});
280+
281+
group.bench_function("entity_reference", |b| {
282+
b.iter(|| {
283+
let text = "age &gt; 72 &amp;&amp; age &lt; 21";
284+
black_box(unescape(text)).unwrap();
285+
let text = "&quot;what&apos;s that?&quot;";
286+
black_box(unescape(text)).unwrap();
287+
})
288+
});
289+
246290
group.finish();
247291
}
248292

@@ -355,6 +399,7 @@ criterion_group!(
355399
read_resolved_event_into,
356400
one_event,
357401
attributes,
402+
attribute_value_normalization,
358403
escaping,
359404
unescaping,
360405
);

examples/custom_entities.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
154154
let label = attrs.next().unwrap()?;
155155
assert_eq!(label.key, QName(b"label"));
156156
assert_eq!(
157-
label.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
157+
label
158+
.decoded_and_normalized_value_with(reader.decoder(), 9, |e| reader.get_entity(e))?,
158159
"Message: hello world"
159160
);
160161

@@ -185,7 +186,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
185186
let attr = attrs.next().unwrap()?;
186187
assert_eq!(attr.key, QName(b"attr"));
187188
assert_eq!(
188-
attr.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
189+
attr.decoded_and_normalized_value_with(reader.decoder(), 9, |e| reader.get_entity(e))?,
189190
"Message: hello world"
190191
);
191192

examples/read_nodes.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ impl Translation {
7070
for attr_result in element.attributes() {
7171
let a = attr_result?;
7272
match a.key.as_ref() {
73-
b"Language" => lang = a.decode_and_unescape_value(reader.decoder())?,
74-
b"Tag" => tag = a.decode_and_unescape_value(reader.decoder())?,
73+
b"Language" => lang = a.decoded_and_normalized_value(reader.decoder())?,
74+
b"Tag" => tag = a.decoded_and_normalized_value(reader.decoder())?,
7575
_ => (),
7676
}
7777
}
@@ -141,7 +141,7 @@ fn main() -> Result<(), AppError> {
141141
Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
142142
})
143143
.unwrap().to_string();
144-
let value = a.decode_and_unescape_value(reader.decoder()).or_else(|err| {
144+
let value = a.decoded_and_normalized_value(reader.decoder()).or_else(|err| {
145145
dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err);
146146
Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
147147
}).unwrap().to_string();

fuzz/fuzz_targets/fuzz_target_1.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ where
3434
debug_format!(e.name());
3535
for a in e.attributes() {
3636
debug_format!(a);
37-
if a.ok().map_or(false, |a| a.unescape_value().is_err()) {
37+
if a.ok().map_or(false, |a| a.normalized_value().is_err()) {
3838
break;
3939
}
4040
}

0 commit comments

Comments
 (0)