Add simple sentence clustering demo for the Universal Sentence Encoder. (#145)

annxingyuan · web-flow · commit cf6ef097b4e2 · 2019-02-07T10:58:56.000-05:00
diff --git a/universal-sentence-encoder/.gitignore b/universal-sentence-encoder/.gitignore
@@ -1,3 +1,4 @@
 node_modules/
 .cache/
-dist/
+dist/
+.DS_Store
diff --git a/universal-sentence-encoder/README.md b/universal-sentence-encoder/README.md
@@ -1,8 +1,22 @@
 # Universal Sentence Encoder lite
 
-The Universal Sentence Encoder ([Cer et al., 2018](https://arxiv.org/pdf/1803.11175.pdf)) is a model that encodes text into 512-dimensional embeddings. These embeddings can then be used as inputs to natural language processing tasks such as [sentiment classification](https://en.wikipedia.org/wiki/Sentiment_analysis) and [textual similarity](https://en.wikipedia.org/wiki/Semantic_similarity) analysis.
+The Universal Sentence Encoder ([Cer et al., 2018](https://arxiv.org/pdf/1803.11175.pdf)) (USE) is a model that encodes text into 512-dimensional embeddings. These embeddings can then be used as inputs to natural language processing tasks such as [sentiment classification](https://en.wikipedia.org/wiki/Sentiment_analysis) and [textual similarity](https://en.wikipedia.org/wiki/Semantic_similarity) analysis.
 
-This module is a TensorFlow.js [`FrozenModel`](https://js.tensorflow.org/api/latest/#loadFrozenModel) converted from the Universal Sentence Encoder lite ([module on TFHub](https://tfhub.dev/google/universal-sentence-encoder-lite/2)), a lightweight version of the original. The lite model is based on the Transformer ([Vaswani et al, 2017](https://arxiv.org/pdf/1706.03762.pdf)) architecture, and uses an 8k word piece [vocabulary](https://storage.googleapis.com/tfjs-models/savedmodel/universal_sentence_encoder/vocab.json).
+This module is a TensorFlow.js [`FrozenModel`](https://js.tensorflow.org/api/latest/#loadFrozenModel) converted from the USE lite ([module on TFHub](https://tfhub.dev/google/universal-sentence-encoder-lite/2)), a lightweight version of the original. The lite model is based on the Transformer ([Vaswani et al, 2017](https://arxiv.org/pdf/1706.03762.pdf)) architecture, and uses an 8k word piece [vocabulary](https://storage.googleapis.com/tfjs-models/savedmodel/universal_sentence_encoder/vocab.json).
+
+In [this demo](./demo/index.js) we embed six sentences with the USE, and render their self-similarity scores in a matrix (redder means more similar):
+
+![selfsimilarity](./images/self_similarity.jpg)
+
+*The matrix shows that USE embeddings can be used to cluster sentences by similarity.*
+
+The sentences (taken from the [TensorFlow Hub USE lite colab](https://colab.sandbox.google.com/github/tensorflow/hub/blob/master/examples/colab/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb#scrollTo=_GSCW5QIBKVe)):
+1. I like my phone.
+2. Your cellphone looks great.
+3. How old are you?
+4. What is your age?
+5. An apple a day, keeps the doctors away.
+6. Eating strawberries is healthy.
 
 ## Usage
 
diff --git a/universal-sentence-encoder/demo/.babelrc b/universal-sentence-encoder/demo/.babelrc
@@ -0,0 +1,18 @@
+{
+  "presets": [
+    [
+      "env",
+      {
+        "esmodules": false,
+        "targets": {
+          "browsers": [
+            "> 3%"
+          ]
+        }
+      }
+    ]
+  ],
+  "plugins": [
+    "transform-runtime"
+  ]
+}
diff --git a/universal-sentence-encoder/demo/README.md b/universal-sentence-encoder/demo/README.md
@@ -0,0 +1,71 @@
+# Universal Sentence Encoder Demo
+
+## Contents
+
+The demo shows how to use embeddings produced by the Universal Sentence Encoder.
+
+## Setup
+
+cd into the demos folder:
+
+```sh
+cd universal-sentence-encoder/demos
+```
+
+Install dependencies and prepare the build directory:
+
+```sh
+yarn
+```
+
+To watch files for changes, and launch a dev server:
+
+```sh
+yarn watch
+```
+
+## If you are developing universal-sentence-encoder locally, and want to test the changes in the demos
+
+Install yalc:
+```sh
+npm i -g yalc
+```
+
+cd into the universal-sentence-encoder folder:
+```sh
+cd universal-sentence-encoder
+```
+
+Install dependencies:
+```sh
+yarn
+```
+
+Publish universal-sentence-encoder locally:
+```sh
+yalc push
+```
+
+Cd into the demos and install dependencies:
+
+```sh
+cd demos
+yarn
+```
+
+Link the local universal-sentence-encoder to the demos:
+```sh
+yalc link @tensorflow-models/universal-sentence-encoder
+```
+
+Start the dev demo server:
+```sh
+yarn watch
+```
+
+To get future updates from the universal-sentence-encoder source code:
+```
+# cd up into the universal-sentence-encoder directory
+cd ../
+yarn build && yalc push
+```
diff --git a/universal-sentence-encoder/demo/index.html b/universal-sentence-encoder/demo/index.html
@@ -0,0 +1,107 @@
+<!-- Copyright 2019 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================-->
+<!DOCTYPE html>
+<html>
+
+<head>
+    <title>TensorFlow.js Universal Sentence Encoder lite demo</title>
+    <style>
+      h1 {
+        margin-bottom: 35px;
+      }
+
+      #main {
+        padding-top: 30px;
+        font-family: Helvetica, sans-serif;
+        max-width: 960px;
+        min-width: 600px;
+        width: 60vw;
+        margin-left: auto;
+        margin-right: auto;
+      }
+
+      #sentences-container {
+        flex: 1 1 auto;
+      }
+
+      #sentences-container > div {
+        margin-bottom: 10px;
+      }
+
+      #container {
+        display: flex;
+        flex-direction: row;
+      }
+
+      #self-similarity-matrix {
+        position: relative;
+      }
+
+      .labels {
+        position: absolute;
+      }
+
+      .x-axis {
+        bottom: 100%;
+        width: 100%;
+        height: 20px;
+      }
+
+      .x-axis > div {
+        transform: translateX(-50%);
+      }
+
+      .y-axis {
+        right: 100%;
+        height: 100%;
+        width: 20px;
+      }
+
+      .y-axis > div {
+        transform: translateY(-50%);
+      }
+
+      .labels > div {
+        position: absolute;
+      }
+
+      #description {
+        margin-bottom: 50px;
+        line-height: 1.6;
+      }
+   </style>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+  <div id='main'>
+    <h1>Universal Sentence Encoder lite demo</h1>
+    <div id="description">This demo is taken from the <a target="_blank" href="https://colab.sandbox.google.com/github/tensorflow/hub/blob/master/examples/colab/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb#scrollTo=_GSCW5QIBKVe">TensorFlow Hub Universal Sentence Encoder lite colab</a>. It shows the model's ability to group sentences by semantic similarity usings their embeddings. The matrix on the right shows self-similarity scores (dot products) between the embeddings for the sentences on the left. The redder the cell, the higher the similarity score.</div>
+    <div id="loading">
+        Loading the model...
+    </div>
+    <div id="container">
+      <div id="sentences-container"></div>
+      <div id="self-similarity-matrix">
+        <div class="labels y-axis"></div>
+        <div class="labels x-axis"></div>
+        <canvas></canvas>
+      </div>
+    </div>
+  </div>
+  <script src="index.js"></script>
+</body>
+
+</html>
diff --git a/universal-sentence-encoder/demo/index.js b/universal-sentence-encoder/demo/index.js
@@ -0,0 +1,82 @@
+/**
+ * @license
+ * Copyright 2019 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import * as use from '@tensorflow-models/universal-sentence-encoder';
+import {interpolateReds} from 'd3-scale-chromatic';
+
+const sentences = [
+  'I like my phone.', 'Your cellphone looks great.', 'How old are you?',
+  'What is your age?', 'An apple a day, keeps the doctors away.',
+  'Eating strawberries is healthy.'
+];
+
+const init = async () => {
+  const model = await use.load();
+
+  document.querySelector('#loading').style.display = 'none';
+  renderSentences();
+
+  const embeddings = await model.embed(sentences);
+
+  const matrixSize = 250;
+  const cellSize = matrixSize / sentences.length;
+  const canvas = document.querySelector('canvas');
+  canvas.width = matrixSize;
+  canvas.height = matrixSize;
+
+  const ctx = canvas.getContext('2d');
+
+  const xLabelsContainer = document.querySelector('.x-axis');
+  const yLabelsContainer = document.querySelector('.y-axis');
+
+  for (let i = 0; i < sentences.length; i++) {
+    const labelXDom = document.createElement('div');
+    const labelYDom = document.createElement('div');
+
+    labelXDom.textContent = i + 1;
+    labelYDom.textContent = i + 1;
+    labelXDom.style.left = (i * cellSize + cellSize / 2) + 'px';
+    labelYDom.style.top = (i * cellSize + cellSize / 2) + 'px';
+
+    xLabelsContainer.appendChild(labelXDom);
+    yLabelsContainer.appendChild(labelYDom);
+
+    for (let j = i; j < sentences.length; j++) {
+      const sentenceI = embeddings.slice([i, 0], [1]);
+      const sentenceJ = embeddings.slice([j, 0], [1]);
+      const sentenceITranspose = false;
+      const sentenceJTransepose = true;
+      const score =
+          sentenceI.matMul(sentenceJ, sentenceITranspose, sentenceJTransepose)
+              .dataSync();
+
+      ctx.fillStyle = interpolateReds(score);
+      ctx.fillRect(j * cellSize, i * cellSize, cellSize, cellSize);
+      ctx.fillRect(i * cellSize, j * cellSize, cellSize, cellSize);
+    }
+  }
+};
+
+init();
+
+const renderSentences = () => {
+  sentences.forEach((sentence, i) => {
+    const sentenceDom = document.createElement('div');
+    sentenceDom.textContent = `${i + 1}) ${sentence}`;
+    document.querySelector('#sentences-container').appendChild(sentenceDom);
+  });
+};
diff --git a/universal-sentence-encoder/demo/package.json b/universal-sentence-encoder/demo/package.json
@@ -0,0 +1,51 @@
+{
+  "name": "tfjs-models",
+  "version": "0.0.1",
+  "description": "",
+  "main": "index.js",
+  "license": "Apache-2.0",
+  "private": true,
+  "engines": {
+    "node": ">=8.9.0"
+  },
+  "dependencies": {
+    "@tensorflow-models/universal-sentence-encoder": "0.0.1",
+    "@tensorflow/tfjs": "^0.14.2",
+    "d3-scale-chromatic": "^1.3.3"
+  },
+  "scripts": {
+    "watch": "cross-env NODE_ENV=development parcel index.html --no-hmr --open ",
+    "build": "cross-env NODE_ENV=production parcel build index.html  --no-minify --public-url ./",
+    "lint": "eslint ."
+  },
+  "devDependencies": {
+    "babel-core": "~6.26.3",
+    "babel-plugin-transform-runtime": "~6.23.0",
+    "babel-polyfill": "~6.26.0",
+    "babel-preset-env": "~1.6.1",
+    "clang-format": "~1.2.2",
+    "cross-env": "^5.2.0",
+    "dat.gui": "~0.7.2",
+    "eslint": "~4.19.1",
+    "eslint-config-google": "~0.9.1",
+    "parcel-bundler": "~1.10.3",
+    "yalc": "~1.0.0-pre.23"
+  },
+  "eslintConfig": {
+    "extends": "google",
+    "rules": {
+      "require-jsdoc": 0,
+      "valid-jsdoc": 0
+    },
+    "env": {
+      "es6": true
+    },
+    "parserOptions": {
+      "ecmaVersion": 8,
+      "sourceType": "module"
+    }
+  },
+  "eslintIgnore": [
+    "dist/"
+  ]
+}
diff --git a/universal-sentence-encoder/demo/yarn.lock b/universal-sentence-encoder/demo/yarn.lock
diff --git a/universal-sentence-encoder/images/self_similarity.jpg b/universal-sentence-encoder/images/self_similarity.jpg