@@ -8,17 +8,17 @@ use std::thread;
8
8
use fancy_regex:: Regex ;
9
9
use pyo3:: exceptions;
10
10
use pyo3:: prelude:: * ;
11
+ use pyo3:: pyclass;
11
12
use pyo3:: PyResult ;
12
13
use pyo3:: types:: { PyBytes , PyList , PyTuple } ;
13
14
use rustc_hash:: FxHashMap as HashMap ;
14
15
15
16
type Rank = u32 ;
16
17
17
- fn _byte_pair_merge < T > (
18
- piece : & [ u8 ] ,
18
+ fn _byte_pair_merge (
19
19
ranks : & HashMap < Vec < u8 > , Rank > ,
20
- f : impl Fn ( std :: ops :: Range < usize > ) -> T ,
21
- ) -> Vec < T > {
20
+ piece : & [ u8 ] ,
21
+ ) -> Vec < ( usize , Rank ) > {
22
22
// This is a vector of (start, rank).
23
23
// The rank is of the byte pair starting at position start.
24
24
// The rank of the last item in the vector is not a valid value.
@@ -93,25 +93,24 @@ fn _byte_pair_merge<T>(
93
93
break ;
94
94
}
95
95
}
96
- let mut out: Vec < T > = Vec :: with_capacity ( parts. len ( ) - 1 ) ;
97
- for i in 0 ..parts. len ( ) - 1 {
98
- out. push ( f ( parts[ i] . 0 ..parts[ i + 1 ] . 0 ) ) ;
99
- }
100
- out
96
+
97
+ parts
101
98
}
102
99
103
100
pub fn byte_pair_encode ( piece : & [ u8 ] , ranks : & HashMap < Vec < u8 > , Rank > ) -> Vec < Rank > {
104
- if piece. len ( ) == 1 {
105
- return vec ! [ ranks[ piece] ] ;
106
- }
107
- _byte_pair_merge ( piece, ranks, |p| ranks[ & piece[ p. start ..p. end ] ] )
101
+ assert ! ( piece. len( ) > 1 ) ;
102
+ _byte_pair_merge ( & ranks, & piece)
103
+ . windows ( 2 )
104
+ . map ( |part| ranks[ & piece[ part[ 0 ] . 0 ..part[ 1 ] . 0 ] ] )
105
+ . collect ( )
108
106
}
109
107
110
108
pub fn byte_pair_split < ' a > ( piece : & ' a [ u8 ] , ranks : & HashMap < Vec < u8 > , Rank > ) -> Vec < & ' a [ u8 ] > {
111
- if piece. len ( ) == 1 {
112
- return vec ! [ piece] ;
113
- }
114
- _byte_pair_merge ( piece, ranks, |p| & piece[ p. start ..p. end ] )
109
+ assert ! ( piece. len( ) > 1 ) ;
110
+ _byte_pair_merge ( & ranks, & piece)
111
+ . windows ( 2 )
112
+ . map ( |part| & piece[ part[ 0 ] . 0 ..part[ 1 ] . 0 ] )
113
+ . collect ( )
115
114
}
116
115
117
116
// Various performance notes:
0 commit comments