File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -936,7 +936,11 @@ def _create_vocab_sentencepiece(self):
936936 scores : list [float ] = [- 10000.0 ] * vocab_size
937937 toktypes : list [int ] = [SentencePieceTokenTypes .UNUSED ] * vocab_size
938938
939- for token_id in range (vocab_size ):
939+ for token_id in range (tokenizer .vocab_size ()):
940+ if token_id >= vocab_size :
941+ logger .warning (f'ignore tokens from { token_id } : id is out of range, max={ vocab_size - 1 } ' )
942+ break
943+
940944 piece = tokenizer .IdToPiece (token_id )
941945 text = piece .encode ("utf-8" )
942946 score = tokenizer .GetScore (token_id )
@@ -951,10 +955,6 @@ def _create_vocab_sentencepiece(self):
951955 elif tokenizer .IsByte (token_id ):
952956 toktype = SentencePieceTokenTypes .BYTE
953957
954- if token_id >= vocab_size :
955- logger .warning (f'ignore tokens from { token_id } : id is out of range, max={ vocab_size - 1 } ' )
956- break
957-
958958 tokens [token_id ] = text
959959 scores [token_id ] = score
960960 toktypes [token_id ] = toktype
You can’t perform that action at this time.
0 commit comments