@@ -67,6 +67,15 @@ impl From<ProtobufFloatParseError> for LexerError {
6767 }
6868}
6969
70+ /// The raw bytes for a single char or escape sequence in a string literal
71+ ///
72+ /// The raw bytes are available via an `into_iter` implementation.
73+ pub struct DecodedBytes {
74+ // a single char can be up to 4-bytes when encoded in utf-8
75+ buf : [ u8 ; 4 ] ,
76+ len : u8 ,
77+ }
78+
7079#[ derive( Copy , Clone ) ]
7180pub struct Lexer < ' a > {
7281 language : ParserLanguage ,
@@ -440,24 +449,24 @@ impl<'a> Lexer<'a> {
440449 // octEscape = '\' octalDigit octalDigit octalDigit
441450 // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
442451 // quote = "'" | '"'
443- pub fn next_byte_value ( & mut self ) -> LexerResult < u8 > {
452+ pub fn next_str_lit_bytes ( & mut self ) -> LexerResult < DecodedBytes > {
444453 match self . next_char ( ) ? {
445454 '\\' => {
446455 match self . next_char ( ) ? {
447- '\'' => Ok ( b'\'' ) ,
448- '"' => Ok ( b'"' ) ,
449- '\\' => Ok ( b'\\' ) ,
450- 'a' => Ok ( b'\x07' ) ,
451- 'b' => Ok ( b'\x08' ) ,
452- 'f' => Ok ( b'\x0c' ) ,
453- 'n' => Ok ( b'\n' ) ,
454- 'r' => Ok ( b'\r' ) ,
455- 't' => Ok ( b'\t' ) ,
456- 'v' => Ok ( b'\x0b' ) ,
456+ '\'' => Ok ( b'\'' . into ( ) ) ,
457+ '"' => Ok ( b'"' . into ( ) ) ,
458+ '\\' => Ok ( b'\\' . into ( ) ) ,
459+ 'a' => Ok ( b'\x07' . into ( ) ) ,
460+ 'b' => Ok ( b'\x08' . into ( ) ) ,
461+ 'f' => Ok ( b'\x0c' . into ( ) ) ,
462+ 'n' => Ok ( b'\n' . into ( ) ) ,
463+ 'r' => Ok ( b'\r' . into ( ) ) ,
464+ 't' => Ok ( b'\t' . into ( ) ) ,
465+ 'v' => Ok ( b'\x0b' . into ( ) ) ,
457466 'x' => {
458467 let d1 = self . next_hex_digit ( ) ? as u8 ;
459468 let d2 = self . next_hex_digit ( ) ? as u8 ;
460- Ok ( ( ( d1 << 4 ) | d2) as u8 )
469+ Ok ( ( ( ( d1 << 4 ) | d2) as u8 ) . into ( ) )
461470 }
462471 d if d >= '0' && d <= '7' => {
463472 let mut r = d as u8 - b'0' ;
@@ -467,16 +476,14 @@ impl<'a> Lexer<'a> {
467476 Ok ( d) => r = ( r << 3 ) + d as u8 ,
468477 }
469478 }
470- Ok ( r)
479+ Ok ( r. into ( ) )
471480 }
472481 // https://github.com/google/protobuf/issues/4562
473- // TODO: overflow
474- c => Ok ( c as u8 ) ,
482+ c => Ok ( c. into ( ) ) ,
475483 }
476484 }
477485 '\n' | '\0' => Err ( LexerError :: IncorrectInput ) ,
478- // TODO: check overflow
479- c => Ok ( c as u8 ) ,
486+ c => Ok ( c. into ( ) ) ,
480487 }
481488 }
482489
@@ -530,7 +537,7 @@ impl<'a> Lexer<'a> {
530537 } ;
531538 first = false ;
532539 while self . lookahead_char ( ) != Some ( q) {
533- self . next_byte_value ( ) ?;
540+ self . next_str_lit_bytes ( ) ?;
534541 }
535542 self . next_char_expect_eq ( q) ?;
536543
@@ -663,6 +670,37 @@ impl<'a> Lexer<'a> {
663670 }
664671}
665672
673+ impl From < u8 > for DecodedBytes {
674+ fn from ( value : u8 ) -> Self {
675+ DecodedBytes {
676+ buf : [ value, 0 , 0 , 0 ] ,
677+ len : 1 ,
678+ }
679+ }
680+ }
681+
682+ impl From < char > for DecodedBytes {
683+ fn from ( value : char ) -> Self {
684+ let mut this = DecodedBytes {
685+ buf : [ 0 ; 4 ] ,
686+ len : 0 ,
687+ } ;
688+ let len = value. encode_utf8 ( & mut this. buf ) . len ( ) ;
689+ this. len = len as _ ;
690+ this
691+ }
692+ }
693+
694+ // means that we work with `Vec::extend`.
695+ impl IntoIterator for DecodedBytes {
696+ type Item = u8 ;
697+ type IntoIter = std:: iter:: Take < std:: array:: IntoIter < u8 , 4 > > ;
698+
699+ fn into_iter ( self ) -> Self :: IntoIter {
700+ self . buf . into_iter ( ) . take ( self . len as _ )
701+ }
702+ }
703+
666704#[ cfg( test) ]
667705mod test {
668706 use super :: * ;
0 commit comments