|
1 | 1 | #[cfg(feature = "encoding")] |
2 | 2 | use encoding_rs::UTF_8; |
3 | 3 |
|
4 | | -#[cfg(feature = "encoding")] |
5 | | -use crate::encoding::detect_encoding; |
6 | | -use crate::encoding::Decoder; |
| 4 | +use crate::encoding::{self, Decoder}; |
7 | 5 | use crate::errors::{Error, Result}; |
8 | 6 | use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; |
9 | 7 | #[cfg(feature = "encoding")] |
@@ -68,23 +66,31 @@ impl Parser { |
68 | 66 | /// |
69 | 67 | /// [`Text`]: Event::Text |
70 | 68 | pub fn read_text<'b>(&mut self, bytes: &'b [u8], first: bool) -> Result<Event<'b>> { |
71 | | - #[cfg(feature = "encoding")] |
72 | | - if first && self.encoding.can_be_refined() { |
73 | | - if let Some(encoding) = detect_encoding(bytes) { |
74 | | - self.encoding = EncodingRef::BomDetected(encoding); |
75 | | - } |
76 | | - } |
| 69 | + let mut content = bytes; |
77 | 70 |
|
78 | | - let content = if self.trim_text_end { |
| 71 | + if self.trim_text_end { |
79 | 72 | // Skip the ending '<' |
80 | 73 | let len = bytes |
81 | 74 | .iter() |
82 | 75 | .rposition(|&b| !is_whitespace(b)) |
83 | 76 | .map_or_else(|| bytes.len(), |p| p + 1); |
84 | | - &bytes[..len] |
85 | | - } else { |
86 | | - bytes |
87 | | - }; |
| 77 | + content = &bytes[..len]; |
| 78 | + } |
| 79 | + |
| 80 | + if first { |
| 81 | + #[cfg(feature = "encoding")] |
| 82 | + if self.encoding.can_be_refined() { |
| 83 | + if let Some(encoding) = encoding::detect_encoding(bytes) { |
| 84 | + self.encoding = EncodingRef::BomDetected(encoding); |
| 85 | + content = encoding::remove_bom(content, encoding); |
| 86 | + } |
| 87 | + } |
| 88 | + #[cfg(not(feature = "encoding"))] |
| 89 | + if bytes.starts_with(encoding::UTF8_BOM) { |
| 90 | + content = &bytes[encoding::UTF8_BOM.len()..]; |
| 91 | + } |
| 92 | + } |
| 93 | + |
88 | 94 | Ok(Event::Text(BytesText::wrap(content, self.decoder()))) |
89 | 95 | } |
90 | 96 |
|
|
0 commit comments