up stream the changes from g3 (#1128)

pyu10055 · web-flow · commit f3f002842211 · 2023-03-03T16:21:22.000-08:00
diff --git a/universal-sentence-encoder/src/index.ts b/universal-sentence-encoder/src/index.ts
@@ -18,7 +18,7 @@
 import * as tfconv from '@tensorflow/tfjs-converter';
 import * as tf from '@tensorflow/tfjs-core';
 
-import {loadTokenizer, loadVocabulary, Tokenizer} from './tokenizer';
+import {loadTokenizer as loadTokenizerInternal, loadVocabulary, Tokenizer} from './tokenizer';
 import {loadQnA} from './use_qna';
 
 export {version} from './version';
@@ -47,12 +47,11 @@ export class UniversalSentenceEncoder {
   private tokenizer: Tokenizer;
 
   async loadModel(modelUrl?: string) {
-    return modelUrl
-      ? tfconv.loadGraphModel(modelUrl)
-      : tfconv.loadGraphModel(
-          'https://tfhub.dev/tensorflow/tfjs-model/universal-sentence-encoder-lite/1/default/1',
-          {fromTFHub: true}
-        );
+    return modelUrl ?
+        tfconv.loadGraphModel(modelUrl) :
+        tfconv.loadGraphModel(
+            'https://tfhub.dev/tensorflow/tfjs-model/universal-sentence-encoder-lite/1/default/1',
+            {fromTFHub: true});
   }
 
   async load(config: LoadConfig = {}) {
@@ -102,6 +101,14 @@ export class UniversalSentenceEncoder {
   }
 }
 
+/**
+ * Load the Tokenizer for use independently from the UniversalSentenceEncoder.
+ *
+ * @param pathToVocabulary (optional) Provide a path to the vocabulary file.
+ */
+export async function loadTokenizer(pathToVocabulary?: string) {
+  return loadTokenizerInternal(pathToVocabulary || BASE_PATH + '/vocab.json');
+}
+
 export {Tokenizer};
-export {loadTokenizer};
 export {loadQnA};
diff --git a/universal-sentence-encoder/src/tokenizer/index.ts b/universal-sentence-encoder/src/tokenizer/index.ts
@@ -54,8 +54,8 @@ export class Tokenizer {
   trie: Trie;
 
   constructor(
-      private vocabulary: Vocabulary,
-      private reservedSymbolsCount = RESERVED_SYMBOLS_COUNT) {
+      private readonly vocabulary: Vocabulary,
+      private readonly reservedSymbolsCount = RESERVED_SYMBOLS_COUNT) {
     this.trie = new Trie();
 
     for (let i = this.reservedSymbolsCount; i < this.vocabulary.length; i++) {
@@ -121,7 +121,7 @@ export class Tokenizer {
     }
 
     // Merge consecutive unks.
-    const merged = [];
+    const merged: number[] = [];
     let isPreviousUnk = false;
     for (let i = 0; i < results.length; i++) {
       const id = results[i];
@@ -139,9 +139,9 @@ export class Tokenizer {
 /**
  * Load the Tokenizer for use independently from the UniversalSentenceEncoder.
  *
- * @param pathToVocabulary (optional) Provide a path to the vocabulary file.
+ * @param pathToVocabulary Provide a path to the vocabulary file.
  */
-export async function loadTokenizer(pathToVocabulary?: string) {
+export async function loadTokenizer(pathToVocabulary: string) {
   const vocabulary = await loadVocabulary(pathToVocabulary);
   const tokenizer = new Tokenizer(vocabulary);
   return tokenizer;
diff --git a/universal-sentence-encoder/src/tokenizer/trie.ts b/universal-sentence-encoder/src/tokenizer/trie.ts
@@ -21,7 +21,7 @@ import {stringToChars} from '../util';
 type OutputNode = [string[], number, number];
 
 class TrieNode {
-  public parent: TrieNode;
+  public parent: TrieNode|null;
   public end: boolean;
   public children: {[firstSymbol: string]: TrieNode};
   public word: OutputNode;
@@ -74,12 +74,16 @@ export class Trie {
     const output: OutputNode[] = [];
     let node = this.root.children[ss[0]];
 
-    for (let i = 0; i < ss.length && node; i++){
-      if (node.end){ output.push(node.word); }
+    for (let i = 0; i < ss.length && node; i++) {
+      if (node.end) {
+        output.push(node.word);
+      }
       node = node.children[ss[i + 1]];
     }
 
-    if (!output.length){ output.push([[ss[0]], 0, 0]); }
+    if (!output.length) {
+      output.push([[ss[0]], 0, 0]);
+    }
 
     return output;
   }