File tree 3 files changed +28
-17
lines changed
universal-sentence-encoder/src
3 files changed +28
-17
lines changed Original file line number Diff line number Diff line change 18
18
import * as tfconv from '@tensorflow/tfjs-converter' ;
19
19
import * as tf from '@tensorflow/tfjs-core' ;
20
20
21
- import { loadTokenizer , loadVocabulary , Tokenizer } from './tokenizer' ;
21
+ import { loadTokenizer as loadTokenizerInternal , loadVocabulary , Tokenizer } from './tokenizer' ;
22
22
import { loadQnA } from './use_qna' ;
23
23
24
24
export { version } from './version' ;
@@ -47,12 +47,11 @@ export class UniversalSentenceEncoder {
47
47
private tokenizer : Tokenizer ;
48
48
49
49
async loadModel ( modelUrl ?: string ) {
50
- return modelUrl
51
- ? tfconv . loadGraphModel ( modelUrl )
52
- : tfconv . loadGraphModel (
53
- 'https://tfhub.dev/tensorflow/tfjs-model/universal-sentence-encoder-lite/1/default/1' ,
54
- { fromTFHub : true }
55
- ) ;
50
+ return modelUrl ?
51
+ tfconv . loadGraphModel ( modelUrl ) :
52
+ tfconv . loadGraphModel (
53
+ 'https://tfhub.dev/tensorflow/tfjs-model/universal-sentence-encoder-lite/1/default/1' ,
54
+ { fromTFHub : true } ) ;
56
55
}
57
56
58
57
async load ( config : LoadConfig = { } ) {
@@ -102,6 +101,14 @@ export class UniversalSentenceEncoder {
102
101
}
103
102
}
104
103
104
+ /**
105
+ * Load the Tokenizer for use independently from the UniversalSentenceEncoder.
106
+ *
107
+ * @param pathToVocabulary (optional) Provide a path to the vocabulary file.
108
+ */
109
+ export async function loadTokenizer ( pathToVocabulary ?: string ) {
110
+ return loadTokenizerInternal ( pathToVocabulary || BASE_PATH + '/vocab.json' ) ;
111
+ }
112
+
105
113
export { Tokenizer } ;
106
- export { loadTokenizer } ;
107
114
export { loadQnA } ;
Original file line number Diff line number Diff line change @@ -54,8 +54,8 @@ export class Tokenizer {
54
54
trie : Trie ;
55
55
56
56
constructor (
57
- private vocabulary : Vocabulary ,
58
- private reservedSymbolsCount = RESERVED_SYMBOLS_COUNT ) {
57
+ private readonly vocabulary : Vocabulary ,
58
+ private readonly reservedSymbolsCount = RESERVED_SYMBOLS_COUNT ) {
59
59
this . trie = new Trie ( ) ;
60
60
61
61
for ( let i = this . reservedSymbolsCount ; i < this . vocabulary . length ; i ++ ) {
@@ -121,7 +121,7 @@ export class Tokenizer {
121
121
}
122
122
123
123
// Merge consecutive unks.
124
- const merged = [ ] ;
124
+ const merged : number [ ] = [ ] ;
125
125
let isPreviousUnk = false ;
126
126
for ( let i = 0 ; i < results . length ; i ++ ) {
127
127
const id = results [ i ] ;
@@ -139,9 +139,9 @@ export class Tokenizer {
139
139
/**
140
140
* Load the Tokenizer for use independently from the UniversalSentenceEncoder.
141
141
*
142
- * @param pathToVocabulary (optional) Provide a path to the vocabulary file.
142
+ * @param pathToVocabulary Provide a path to the vocabulary file.
143
143
*/
144
- export async function loadTokenizer ( pathToVocabulary ? : string ) {
144
+ export async function loadTokenizer ( pathToVocabulary : string ) {
145
145
const vocabulary = await loadVocabulary ( pathToVocabulary ) ;
146
146
const tokenizer = new Tokenizer ( vocabulary ) ;
147
147
return tokenizer ;
Original file line number Diff line number Diff line change @@ -21,7 +21,7 @@ import {stringToChars} from '../util';
21
21
type OutputNode = [ string [ ] , number , number ] ;
22
22
23
23
class TrieNode {
24
- public parent : TrieNode ;
24
+ public parent : TrieNode | null ;
25
25
public end : boolean ;
26
26
public children : { [ firstSymbol : string ] : TrieNode } ;
27
27
public word : OutputNode ;
@@ -74,12 +74,16 @@ export class Trie {
74
74
const output : OutputNode [ ] = [ ] ;
75
75
let node = this . root . children [ ss [ 0 ] ] ;
76
76
77
- for ( let i = 0 ; i < ss . length && node ; i ++ ) {
78
- if ( node . end ) { output . push ( node . word ) ; }
77
+ for ( let i = 0 ; i < ss . length && node ; i ++ ) {
78
+ if ( node . end ) {
79
+ output . push ( node . word ) ;
80
+ }
79
81
node = node . children [ ss [ i + 1 ] ] ;
80
82
}
81
83
82
- if ( ! output . length ) { output . push ( [ [ ss [ 0 ] ] , 0 , 0 ] ) ; }
84
+ if ( ! output . length ) {
85
+ output . push ( [ [ ss [ 0 ] ] , 0 , 0 ] ) ;
86
+ }
83
87
84
88
return output ;
85
89
}
You can’t perform that action at this time.
0 commit comments