SuperBruceJia
diff --git a/‎BiLSTM-docker-Python-without-Apache/Dockerfile
+20 b/‎BiLSTM-docker-Python-without-Apache/Dockerfile
+20
diff --git a/‎BiLSTM-docker-Python-without-Apache/NERmodel.py
+181 b/‎BiLSTM-docker-Python-without-Apache/NERmodel.py
+181
diff --git a/‎BiLSTM-docker-Python-without-Apache/config.yml
+32 b/‎BiLSTM-docker-Python-without-Apache/config.yml
+32
@@ -0,0 +1,20 @@
+FROM python:3.6-slim-buster
+
+EXPOSE 8000
+
+WORKDIR /var/www/NERmodel/
+
+COPY ./NERmodel.py /var/www/NERmodel/NERmodel.py
+COPY ./models /var/www/NERmodel/models/
+COPY ./model.py /var/www/NERmodel/model.py
+COPY ./utils.py /var/www/NERmodel/utils.py
+COPY ./config.yml /var/www/NERmodel/config.yml
+
+# Add Python Packages
+RUN pip install --upgrade --no-cache-dir pip
+RUN pip install --upgrade --ignore-installed --no-cache-dir PyYAML
+RUN pip install --upgrade --no-cache-dir flask
+RUN pip install --upgrade --no-cache-dir flasgger==0.8.1
+RUN pip install --upgrade --no-cache-dir torch==1.2.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+
+CMD python /var/www/NERmodel/NERmodel.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# import necessary Python Packages
+import re
+import pickle
+import torch
+import yaml
+from model import BiLSTMCRF
+from utils import *
+import warnings
+import numpy as np
+from flask import Flask, request
+from flasgger import Swagger
+
+
+warnings.filterwarnings("ignore")
+device = torch.device("cpu")
+
+
+app = Flask(__name__)
+swagger = Swagger(app)
+
+
+def load_params(path: str):
+    """
+    Load the parameters (data)
+    """
+    with open(path + "data.pkl", "rb") as fopen:
+        data_map = pickle.load(fopen)
+    return data_map
+
+
+def strQ2B(ustring):
+    rstring = ""
+    for uchar in ustring:
+        inside_code=ord(uchar)
+        if inside_code == 12288:
+            inside_code = 32
+        elif inside_code >= 65281 and inside_code <= 65374:
+            inside_code -= 65248
+        rstring += chr(inside_code)
+    return rstring
+
+
+def cut_text(text, length):
+    textArr = re.findall('.{' + str(length) + '}', text)
+    textArr.append(text[(len(textArr) * length):])
+    return textArr
+
+
+def load_config():
+    """
+    Load hyper-parameters from the YAML file
+    """
+    fopen = open("config.yml")
+    config = yaml.load(fopen, Loader=yaml.FullLoader)
+    fopen.close()
+    return config
+
+
+class ChineseNER:
+    def __init__(self, entry="train"):
+        # Load some Hyper-parameters
+        config = load_config()
+        self.embedding_size = config.get("embedding_size")
+        self.hidden_size = config.get("hidden_size")
+        self.batch_size = config.get("batch_size")
+        self.model_path = config.get("model_path")
+        self.dropout = config.get("dropout")
+        self.tags = config.get("tags")
+        self.learning_rate = config.get("learning_rate")
+        self.epochs = config.get("epochs")
+        self.weight_decay = config.get("weight_decay")
+        self.transfer_learning = config.get("transfer_learning")
+        self.lr_decay_step = config.get("lr_decay_step")
+        self.lr_decay_rate = config.get("lr_decay_rate")
+        self.max_length = config.get("max_length")
+
+        # Model Initialization
+        self.main_model(entry)
+
+    def main_model(self, entry):
+        """
+        Model Initialization
+        """
+        # The Testing & Inference Process
+        if entry == "predict":
+            data_map = load_params(path=self.model_path)
+            input_size = data_map.get("input_size")
+            self.tag_map = data_map.get("tag_map")
+            self.vocab = data_map.get("vocab")
+            self.model = BiLSTMCRF(
+                tag_map=self.tag_map,
+                vocab_size=input_size,
+                dropout=0.0,
+                embedding_dim=self.embedding_size,
+                hidden_dim=self.hidden_size,
+                max_length=self.max_length
+            )
+            self.restore_model()
+
+    def restore_model(self):
+        """
+        Restore the model if there is one
+        """
+        try:
+            self.model.load_state_dict(torch.load(self.model_path + "params.pkl"))
+            print("Model Successfully Restored!")
+        except Exception as error:
+            print("Model Failed to restore! {}".format(error))
+
+    def predict(self, input_str):
+        """
+        Prediction & Inference Stage
+        :param input_str: Input Chinese sentence
+        :return entities: Predicted entities
+        """
+        if len(input_str) != 0:
+            # Full-width to half-width
+            input_str = strQ2B(input_str)
+            input_str = re.sub(pattern='。', repl='.', string=input_str)
+            text = cut_text(text=input_str, length=self.max_length)
+
+            cut_out = []
+            for cuttext in text:
+                # Get the embedding vector (Input Vector) from vocab
+                input_vec = [self.vocab.get(i, 0) for i in cuttext]
+
+                # convert it to tensor and run the model
+                sentences = torch.tensor(input_vec).view(1, -1)
+
+                length = np.expand_dims(np.shape(sentences)[1], axis=0)
+                length = torch.tensor(length, dtype=torch.int64, device=device)
+
+                _, paths = self.model(sentences=sentences, real_length=length, lengths=None)
+
+                # Get the entities from the model
+                entities = []
+                for tag in self.tags:
+                    tags = get_tags(paths[0], tag, self.tag_map)
+                    entities += format_result(tags, cuttext, tag)
+
+                # Get all the entities
+                all_start = []
+                for entity in entities:
+                    start = entity.get('start')
+                    all_start.append([start, entity])
+
+                # Sort the results by the "start" index
+                sort_d = [value for index, value in sorted(enumerate(all_start), key=lambda all_start: all_start[1])]
+
+                if len(sort_d) == 0:
+                    return print("There was no entity in this sentence!!")
+                else:
+                    sort_d = np.reshape(np.array(sort_d)[:, 1], [np.shape(sort_d)[0], 1])
+                    cut_out.append(sort_d)
+            return cut_out
+        else:
+            return print('Invalid input! Please re-input!!\n')
+
+
+@app.route('/predict', methods=["GET"])
+def predict_iris_file():
+    """Named Entity Recognition (NER) Prediction for Medical Services
+    ---
+    parameters:
+      - name: input_str
+        in: query
+        type: string
+        required: true
+    """
+    input_str = request.args.get("input_str")
+    cn = ChineseNER("predict")
+    prediction = cn.predict(input_str)
+    return str(prediction)
+
+
+# main function
+if __name__ == "__main__":
+    app.run(host='0.0.0.0', port=8000)
@@ -0,0 +1,32 @@
+embedding_size: 50  # 30 ~ 50 dimensionality for ~ M corpus
+hidden_size: 256
+model_path: models/
+dataset_path: data/
+batch_size: 16
+dropout: 0.50
+learning_rate: 0.001
+lr_decay_step: 5
+lr_decay_rate: 0.90
+epochs: 1000
+weight_decay: 0.0005
+max_length: 120
+transfer_learning: False
+tags:
+  - E95f2a617
+  - E320ca3f6
+  - E340ca71c
+  - E1ceb2bd7
+  - E1deb2d6a
+  - E370cabd5
+  - E360caa42
+  - E310ca263
+  - E300ca0d0
+  - E18eb258b
+  - E3c0cb3b4
+  - E1beb2a44
+  - E3d0cb547
+  - E8ff29ca5
+  - E330ca589
+  - E1eeb2efd
+  - E17eb23f8
+  - E94f2a484