tensorflow
diff --git a/‎universal-sentence-encoder/.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎universal-sentence-encoder/.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎universal-sentence-encoder/README.md‎
Lines changed: 29 additions & 0 deletions b/‎universal-sentence-encoder/README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎universal-sentence-encoder/package.json‎
Lines changed: 40 additions & 0 deletions b/‎universal-sentence-encoder/package.json‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎universal-sentence-encoder/rollup.config.js‎
Lines changed: 52 additions & 0 deletions b/‎universal-sentence-encoder/rollup.config.js‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎universal-sentence-encoder/run_tests.ts‎
Lines changed: 21 additions & 0 deletions b/‎universal-sentence-encoder/run_tests.ts‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎universal-sentence-encoder/src/index.ts‎
Lines changed: 89 additions & 0 deletions b/‎universal-sentence-encoder/src/index.ts‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎universal-sentence-encoder/src/test_util.ts‎
Lines changed: 42 additions & 0 deletions b/‎universal-sentence-encoder/src/test_util.ts‎
Lines changed: 42 additions & 0 deletions
@@ -0,0 +1,3 @@
+node_modules/
+.cache/
+dist/
@@ -0,0 +1,29 @@
+# Universal Sentence Encoder lite
+
+The Universal Sentence Encoder ([Cer et al., 2018](https://arxiv.org/pdf/1803.11175.pdf)) is a model that encodes text into 512-dimensional embeddings. These embeddings can then be used as inputs to natural language processing tasks such as [sentiment classification](https://en.wikipedia.org/wiki/Sentiment_analysis) and [textual similarity](https://en.wikipedia.org/wiki/Semantic_similarity) analysis.
+
+This module is a TensorFlow.js [`FrozenModel`](https://js.tensorflow.org/api/latest/#loadFrozenModel) converted from the Universal Sentence Encoder lite ([module on TFHub](https://tfhub.dev/google/universal-sentence-encoder-lite/2)), a lightweight version of the original. The lite model is based on the Transformer ([Vaswani et al, 2017](https://arxiv.org/pdf/1706.03762.pdf)) architecture, and uses an 8k word piece [vocabulary](https://storage.googleapis.com/tfjs-models/savedmodel/universal_sentence_encoder/vocab.json).
+
+## Usage
+
+```js
+
+import * as use from '@tensorflow-models/universal-sentence-encoder';
+
+// Load the model.
+const model = await use.load();
+
+// Embed an array of sentences.
+const sentences = [
+  'Hello.',
+  'How are you?'
+];
+
+const embeddings = await model.embed(sentences);
+
+// `embeddings` is a 2D tensor consisting of the 512-dimensional embeddings for each sentence.
+// So in this example `embeddings` has the shape [2, 512].
+const verbose = true;
+embeddings.print(verbose);
+
+```
@@ -0,0 +1,40 @@
+{
+  "name": "@tensorflow-models/universal-sentence-encoder",
+  "version": "0.0.1",
+  "description": "Universal Sentence Encoder lite in TensorFlow.js",
+  "main": "dist/index.js",
+  "jsnext:main": "dist/universal-sentence-encoder.esm.js",
+  "module": "dist/universal-sentence-encoder.esm.js",
+  "unpkg": "dist/universal-sentence-encoder.min.js",
+  "jsdelivr": "dist/universal-sentence-encoder.min.js",
+  "types": "dist/index.d.ts",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/tensorflow/tfjs-models.git"
+  },
+  "peerDependencies": {
+    "@tensorflow/tfjs": "^0.14.2"
+  },
+  "devDependencies": {
+    "@tensorflow/tfjs": "^0.14.2",
+    "@types/jasmine": "~2.5.53",
+    "jasmine": "^3.3.1",
+    "jasmine-core": "^3.3.0",
+    "rimraf": "~2.6.2",
+    "rollup": "~0.58.2",
+    "rollup-plugin-node-resolve": "~3.3.0",
+    "rollup-plugin-typescript2": "~0.13.0",
+    "rollup-plugin-uglify": "~3.0.0",
+    "ts-node": "~5.0.0",
+    "tslint": "~5.8.0",
+    "typescript": "2.9.2"
+  },
+  "scripts": {
+    "build": "rimraf dist && tsc",
+    "publish-local": "yarn build && yalc push",
+    "test": "ts-node run_tests.ts",
+    "publish-npm": "yarn build && rollup -c && npm publish",
+    "lint": "tslint -p . -t verbose"
+  },
+  "license": "Apache-2.0"
+}
@@ -0,0 +1,52 @@
+/**
+ * @license
+ * Copyright 2019 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import node from 'rollup-plugin-node-resolve';
+import typescript from 'rollup-plugin-typescript2';
+import uglify from 'rollup-plugin-uglify';
+
+const PREAMBLE =
+    `// @tensorflow/tfjs-models Copyright ${(new Date).getFullYear()} Google`;
+
+function minify() {
+  return uglify({output: {preamble: PREAMBLE}});
+}
+
+function config({plugins = [], output = {}}) {
+  return {
+    input: 'src/index.ts',
+    plugins: [
+      typescript({tsconfigOverride: {compilerOptions: {module: 'ES2015'}}}),
+      node(), ...plugins
+    ],
+    output: {banner: PREAMBLE, globals: {'@tensorflow/tfjs': 'tf'}, ...output},
+    external: ['@tensorflow/tfjs']
+  };
+}
+
+export default [
+  config(
+      {output: {format: 'umd', name: 'universal-sentence-encoder', file: 'dist/universal-sentence-encoder.js'}}),
+  config({
+    plugins: [minify()],
+    output: {format: 'umd', name: 'universal-sentence-encoder', file: 'dist/universal-sentence-encoder.min.js'}
+  }),
+  config({
+    plugins: [minify()],
+    output: {format: 'es', file: 'dist/universal-sentence-encoder.esm.js'}
+  })
+];
@@ -0,0 +1,21 @@
+/**
+ * @license
+ * Copyright 2019 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import * as jasmine_util from '@tensorflow/tfjs-core/dist/jasmine_util';
+import {runTests} from '../test_util';
+
+runTests(jasmine_util);
@@ -0,0 +1,89 @@
+/**
+ * @license
+ * Copyright 2019 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import * as tf from '@tensorflow/tfjs';
+
+import {Tokenizer} from './tokenizer';
+
+const BASE_PATH =
+    'https://storage.googleapis.com/tfjs-models/savedmodel/universal_sentence_encoder/';
+
+export async function load() {
+  const use = new UniversalSentenceEncoder();
+  await use.load();
+  return use;
+}
+
+export class UniversalSentenceEncoder {
+  private model: tf.FrozenModel;
+  private tokenizer: Tokenizer;
+
+  async loadModel() {
+    return tf.loadFrozenModel(
+        `${BASE_PATH}tensorflowjs_model.pb`,
+        `${BASE_PATH}weights_manifest.json`);
+  }
+
+  async loadVocabulary() {
+    const vocabulary = await fetch(`${BASE_PATH}vocab.json`);
+    return vocabulary.json();
+  }
+
+  async load() {
+    const [model, vocabulary] =
+        await Promise.all([this.loadModel(), this.loadVocabulary()]);
+
+    this.model = model;
+    this.tokenizer = new Tokenizer(vocabulary);
+  }
+
+  /**
+   *
+   * Returns a 2D Tensor of shape [input.length, 512] that contains the
+   * Universal Sentence Encoder embeddings for each input.
+   *
+   * @param inputs A string or an array of strings to embed.
+   */
+  async embed(inputs: string[]|string): Promise<tf.Tensor2D> {
+    if (typeof inputs === 'string') {
+      inputs = [inputs];
+    }
+
+    const encodings = inputs.map(d => this.tokenizer.encode(d));
+
+    const indicesArr =
+        encodings.map((arr, i) => arr.map((d, index) => [i, index]));
+
+    let flattenedIndicesArr: Array<[number, number]> = [];
+    for (let i = 0; i < indicesArr.length; i++) {
+      flattenedIndicesArr =
+          flattenedIndicesArr.concat(indicesArr[i] as Array<[number, number]>);
+    }
+
+    const indices = tf.tensor2d(
+        flattenedIndicesArr, [flattenedIndicesArr.length, 2], 'int32');
+    const values = tf.tensor1d(tf.util.flatten(encodings) as number[], 'int32');
+
+    const embeddings = await this.model.executeAsync({indices, values});
+    indices.dispose();
+    values.dispose();
+
+    return embeddings as tf.Tensor2D;
+  }
+}
+
+export {Tokenizer};
@@ -0,0 +1,42 @@
+/**
+ * @license
+ * Copyright 2019 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+export const stubbedTokenizerVocab = [
+  ['�', 0],
+  ['<s>', 0],
+  ['</s>', 0],
+  ['extra_token_id_1', 0],
+  ['extra_token_id_2', 0],
+  ['extra_token_id_3', 0],
+  ['▁', -2],
+  ['▁a', -1],
+  ['▁ç', -2],
+  ['a', -3],
+  ['.', -1],
+  ['▁I', -1],
+  ['▁like', -1],
+  ['▁it', -1],
+  ['I', -2],
+  ['like', -2],
+  ['it', -2],
+  ['l', -3],
+  ['i', -3],
+  ['k', -3],
+  ['e', -3],
+  ['i', -3],
+  ['t', -3]
+];
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+node_modules/`
	`2`	`+.cache/`
	`3`	`+dist/`