diff --git a/.gitignore b/.gitignore index fa1a8d8..53603d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ Data/ __pycache__/ .idea/ +.venv/ README.html Embeddings-Discussion.html Models-Specifications.html @@ -14,3 +15,4 @@ Models/Burmese_temp_genvec/ Models/Burmese_model4_version2/ Models/Other/ *~ +**/.DS_Store \ No newline at end of file diff --git a/event_log.txt b/event_log.txt new file mode 100644 index 0000000..e69de29 diff --git a/lstm_word_segmentation/helpers.py b/lstm_word_segmentation/helpers.py index 3b5b00c..5a830e2 100644 --- a/lstm_word_segmentation/helpers.py +++ b/lstm_word_segmentation/helpers.py @@ -31,24 +31,11 @@ def sigmoid(inp): inp: the input which can be a scalar or a 1d numpy array """ inp = np.asarray(inp) - scalar_input = False - if inp.ndim == 0: - inp = inp[None] - scalar_input = True # Checking for case when the input is an array/np.array of arrays. In this case only the first element of inp is # used. A common example is when A = np.array([np.array([1, 2, 3])]). - if type(inp[0]) == np.ndarray: + if inp.ndim == 2: inp = inp[0] - out = [] - for x in inp: - if x < -20: - out.append(0) - else: - out.append(1.0/(1.0 + np.exp(-x))) - out = np.array(out) - if scalar_input: - return np.squeeze(out) - return out + return 1.0 / (1.0 + np.exp(-np.clip(inp, -709.78, 709.78))) def print_grapheme_clusters(thrsh, language, exclusive): diff --git a/lstm_word_segmentation/word_segmenter.py b/lstm_word_segmentation/word_segmenter.py index a06e0a8..7738c7f 100644 --- a/lstm_word_segmentation/word_segmenter.py +++ b/lstm_word_segmentation/word_segmenter.py @@ -701,4 +701,4 @@ def pick_lstm_model(model_name, embedding, train_data, eval_data): input_epochs=15, input_training_data=train_data, input_evaluation_data=eval_data, input_language=language, input_embedding_type=embedding) word_segmenter.set_model(model) - return word_segmenter + return word_segmenter \ No newline at end of file diff --git a/test/test_helpers.py b/test/test_helpers.py index a2d3784..bf1c691 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -67,7 +67,8 @@ def test_sigmoid(self): TestCase(0, np.array(0.5)), TestCase(np.array([0, -1000]), np.array([0.5, 0])), TestCase(np.array([[0, 1, 100, -1, -10]]), np.array([0.5, 0.73105858, 1, 0.26894142, 0.00004540])), - TestCase(np.array([np.array([0, 1, 100, -1, -10]), np.array([1, 2, 3])]), np.array([0.5, 0.73105858, 1, 0.26894142, 0.00004540])), + TestCase(np.array([np.array([0, 1, 100, -1, -10]), np.array([1, 2, 3, 4, 5])]), np.array([0.5, 0.73105858, 1, 0.26894142, 0.00004540])), + TestCase(np.array([np.array([1, 2, 3])]), [0.73105858, 0.88079708, 0.95257413]), ] for cas in cases: computed = sigmoid(inp=cas.input) diff --git a/train_thai.py b/train_thai.py index 07fce15..dab3e75 100755 --- a/train_thai.py +++ b/train_thai.py @@ -33,4 +33,4 @@ word_segmenter.hunits)) # word_segmenter.save_model() word_segmenter.test_model_line_by_line(verbose=True, fast=True) -# ''' +# ''' \ No newline at end of file