MFS working baseline and scorer

kanishkamisra · kanishkamisra · commit 697cb66cf7ba · 2021-08-10T22:25:57.000-04:00
diff --git a/MFS Baseline.ipynb b/MFS Baseline.ipynb
@@ -4,47 +4,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/feature_extraction/image.py:167: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  dtype=np.int):\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:35: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=np.finfo(np.float).eps,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:597: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:836: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:862: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=np.finfo(np.float).eps, positive=False):\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:1097: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:1344: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/least_angle.py:1480: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=np.finfo(np.float).eps, copy_X=True, positive=False):\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/randomized_l1.py:152: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  precompute=False, eps=np.finfo(np.float).eps,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/randomized_l1.py:320: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=np.finfo(np.float).eps, random_state=None,\n",
-      "/home/kanishka/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/randomized_l1.py:580: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  eps=4 * np.finfo(np.float).eps, n_jobs=None,\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from nltk.corpus import wordnet as wn\n",
     "from collections import defaultdict\n",
@@ -247,7 +207,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,
diff --git a/README.md b/README.md
@@ -34,6 +34,7 @@ This script converts the datasets into jsonl files with the following format for
 
 ```json
 {
+    "id": "id as mentioned in wsd datasets.",
     "word": "<the word>",
     "start": "<start index of the span>",
     "end": "<end index of the span>",
diff --git a/Scorer.class b/Scorer.class
diff --git a/evaluate.py b/evaluate.py
@@ -0,0 +1,3 @@
+'''
+Replicate Scorer.java but also include POS level results.
+'''
diff --git a/mfs_baseline.py b/mfs_baseline.py
@@ -0,0 +1,31 @@
+import argparse
+from nltk.corpus import wordnet as wn
+from collections import defaultdict
+import json
+
+datasets = ['ALL', 'sem']
+
+def wn_first(lemma, pos = None):
+    for l in wn.synsets(lemma, pos)[0].lemmas():
+        key = l.key()
+        if key.startswith('{}%'.format(lemma)):
+            res = key
+            break
+        else:
+            res = ''
+    return res
+
+pos = {
+    'VERB': 'v',
+    'NOUN': 'n',
+    'ADJ': 'a',
+    'ADV': 'r'
+}
+
+with open(f'data/outputs/{args.dataset}_mfs.txt', 'w') as fw:
+    with open('data/jsonl/{args.dataset}.jsonl', 'r') as f:
+        for line in f:
+            items = json.loads(line)
+            mfs = wn_first(items['lemma'], pos[items['pos']])
+            out = f'{items["id"]} {mfs}\n'
+            fw.write(out)

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@ This script converts the datasets into jsonl files with the following format for`
`34`	`34`
`35`	`35`	```json
`36`	`36`	`{`
	`37`	`+ "id": "id as mentioned in wsd datasets.",`
`37`	`38`	`"word": "<the word>",`
`38`	`39`	`"start": "<start index of the span>",`
`39`	`40`	`"end": "<end index of the span>",`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+'''`
	`2`	`+Replicate Scorer.java but also include POS level results.`
	`3`	`+'''`