@@ -361,7 +361,7 @@ macro_rules! read_until_close {
361361 } ,
362362 // `<?` - processing instruction
363363 Ok ( Some ( b'?' ) ) => match $reader
364- . read_pi ( $buf, & mut $self. state. offset)
364+ . read :: < PiParser > ( $buf, & mut $self. state. offset)
365365 $( . $await) ?
366366 {
367367 Ok ( bytes) => $self. state. emit_question_mark( bytes) ,
@@ -374,7 +374,7 @@ macro_rules! read_until_close {
374374 } ,
375375 // `<...` - opening or self-closed tag
376376 Ok ( Some ( _) ) => match $reader
377- . read_element ( $buf, & mut $self. state. offset)
377+ . read :: < ElementParser > ( $buf, & mut $self. state. offset)
378378 $( . $await) ?
379379 {
380380 Ok ( bytes) => $self. state. emit_start( bytes) ,
@@ -763,6 +763,25 @@ impl<R> Reader<R> {
763763
764764////////////////////////////////////////////////////////////////////////////////////////////////////
765765
766+ /// Used to decouple reading of data from data source and parsing XML structure from it.
767+ ///
768+ /// This trait is implemented for every parser that processes piece of XML grammar.
769+ pub trait Parser : Default {
770+ /// Process new data and try to determine end of the parsed thing.
771+ ///
772+ /// Returns position of the end of thing in `bytes` in case of successful search
773+ /// and `None` otherwise.
774+ ///
775+ /// # Parameters
776+ /// - `bytes`: a slice to find the end of a thing
777+ /// Should contain text in ASCII-compatible encoding
778+ fn feed ( & mut self , bytes : & [ u8 ] ) -> Option < usize > ;
779+
780+ /// Returns parse error produced by this parser in case of reaching end of
781+ /// input without finding the end of a parsed thing.
782+ fn eof_error ( ) -> SyntaxError ;
783+ }
784+
766785/// Represents an input for a reader that can return borrowed data.
767786///
768787/// There are two implementors of this trait: generic one that read data from
@@ -821,20 +840,20 @@ trait XmlSource<'r, B> {
821840
822841 /// Read input until processing instruction is finished.
823842 ///
824- /// This method expect that `<?` already was read.
843+ /// This method expect that start sequence of a parser already was read.
825844 ///
826- /// Returns a slice of data read up to end of processing instruction (`>`),
827- /// which does not include into result (`?` at the end included) .
845+ /// Returns a slice of data read up to end of a chunk, which does not include
846+ /// into result.
828847 ///
829- /// If input (`Self`) is exhausted and nothing was read, returns `None `.
848+ /// If input (`Self`) is exhausted and nothing was read, returns `SyntaxError `.
830849 ///
831850 /// # Parameters
832851 /// - `buf`: Buffer that could be filled from an input (`Self`) and
833852 /// from which [events] could borrow their data
834853 /// - `position`: Will be increased by amount of bytes consumed
835854 ///
836855 /// [events]: crate::events::Event
837- fn read_pi ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
856+ fn read < P : Parser > ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
838857
839858 /// Read input until comment or CDATA is finished.
840859 ///
@@ -853,30 +872,6 @@ trait XmlSource<'r, B> {
853872 /// [events]: crate::events::Event
854873 fn read_bang_element ( & mut self , buf : B , position : & mut usize ) -> Result < ( BangType , & ' r [ u8 ] ) > ;
855874
856- /// Read input until XML element is closed by approaching a `>` symbol.
857- /// Returns a buffer that contains a data between `<` and `>` or
858- /// [`SyntaxError::UnclosedTag`] if end-of-input was reached before reading `>`.
859- ///
860- /// Derived from `read_until`, but modified to handle XML attributes
861- /// using a minimal state machine.
862- ///
863- /// Attribute values are [defined] as follows:
864- /// ```plain
865- /// AttValue := '"' (([^<&"]) | Reference)* '"'
866- /// | "'" (([^<&']) | Reference)* "'"
867- /// ```
868- /// (`Reference` is something like `"`, but we don't care about
869- /// escaped characters at this level)
870- ///
871- /// # Parameters
872- /// - `buf`: Buffer that could be filled from an input (`Self`) and
873- /// from which [events] could borrow their data
874- /// - `position`: Will be increased by amount of bytes consumed
875- ///
876- /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue
877- /// [events]: crate::events::Event
878- fn read_element ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
879-
880875 /// Consume and discard all the whitespace until the next non-whitespace
881876 /// character or EOF.
882877 ///
@@ -1510,6 +1505,7 @@ mod test {
15101505 mod read_element {
15111506 use super :: * ;
15121507 use crate :: errors:: { Error , SyntaxError } ;
1508+ use crate :: reader:: ElementParser ;
15131509 use crate :: utils:: Bytes ;
15141510 use pretty_assertions:: assert_eq;
15151511
@@ -1521,7 +1517,7 @@ mod test {
15211517 let mut input = b"" . as_ref( ) ;
15221518 // ^= 1
15231519
1524- match $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? {
1520+ match $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? {
15251521 Err ( Error :: Syntax ( SyntaxError :: UnclosedTag ) ) => { }
15261522 x => panic!(
15271523 "Expected `Err(Syntax(UnclosedTag))`, but got `{:?}`" ,
@@ -1543,7 +1539,7 @@ mod test {
15431539 // ^= 2
15441540
15451541 assert_eq!(
1546- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1542+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
15471543 Bytes ( b"" )
15481544 ) ;
15491545 assert_eq!( position, 2 ) ;
@@ -1557,7 +1553,7 @@ mod test {
15571553 // ^= 5
15581554
15591555 assert_eq!(
1560- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1556+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
15611557 Bytes ( b"tag" )
15621558 ) ;
15631559 assert_eq!( position, 5 ) ;
@@ -1571,7 +1567,7 @@ mod test {
15711567 // ^= 3
15721568
15731569 assert_eq!(
1574- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1570+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
15751571 Bytes ( b":" )
15761572 ) ;
15771573 assert_eq!( position, 3 ) ;
@@ -1585,7 +1581,7 @@ mod test {
15851581 // ^= 6
15861582
15871583 assert_eq!(
1588- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1584+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
15891585 Bytes ( b":tag" )
15901586 ) ;
15911587 assert_eq!( position, 6 ) ;
@@ -1599,7 +1595,7 @@ mod test {
15991595 // ^= 39
16001596
16011597 assert_eq!(
1602- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1598+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
16031599 Bytes ( br#"tag attr-1=">" attr2 = '>' 3attr"# )
16041600 ) ;
16051601 assert_eq!( position, 39 ) ;
@@ -1618,7 +1614,7 @@ mod test {
16181614 // ^= 3
16191615
16201616 assert_eq!(
1621- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1617+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
16221618 Bytes ( b"/" )
16231619 ) ;
16241620 assert_eq!( position, 3 ) ;
@@ -1632,7 +1628,7 @@ mod test {
16321628 // ^= 6
16331629
16341630 assert_eq!(
1635- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1631+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
16361632 Bytes ( b"tag/" )
16371633 ) ;
16381634 assert_eq!( position, 6 ) ;
@@ -1646,7 +1642,7 @@ mod test {
16461642 // ^= 4
16471643
16481644 assert_eq!(
1649- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1645+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
16501646 Bytes ( b":/" )
16511647 ) ;
16521648 assert_eq!( position, 4 ) ;
@@ -1660,7 +1656,7 @@ mod test {
16601656 // ^= 7
16611657
16621658 assert_eq!(
1663- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1659+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
16641660 Bytes ( b":tag/" )
16651661 ) ;
16661662 assert_eq!( position, 7 ) ;
@@ -1674,7 +1670,7 @@ mod test {
16741670 // ^= 42
16751671
16761672 assert_eq!(
1677- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1673+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
16781674 Bytes ( br#"tag attr-1="/>" attr2 = '/>' 3attr/"# )
16791675 ) ;
16801676 assert_eq!( position, 42 ) ;
0 commit comments