-
Notifications
You must be signed in to change notification settings - Fork 3
/
masakhaner-zero-shot.py
87 lines (60 loc) · 2.54 KB
/
masakhaner-zero-shot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import sys
from flair.datasets import NER_MASAKHANE
from flair.models import SequenceTagger
from tabulate import tabulate
languages = ["amh", "hau", "ibo", "kin", "lug", "luo", "pcm", "swa", "wol", "yor"]
language_mapping = {}
label_name_map = {"DATE": "O"}
for language in languages:
language_mapping[language] = NER_MASAKHANE(languages=language, version="v1",
label_name_map=label_name_map)
model_names = sys.argv[1:]
print("Models:", model_names)
dev_table = []
test_table = []
for model_name in model_names:
current_model = SequenceTagger.load(model_name)
current_dev_entry = [model_name]
current_test_entry = [model_name]
for language in languages:
current_corpus = language_mapping[language]
dev_result = current_model.evaluate(current_corpus.dev, gold_label_type="ner",
mini_batch_size=64).main_score
test_result = current_model.evaluate(current_corpus.test, gold_label_type="ner",
mini_batch_size=64).main_score
dev_result = round(dev_result * 100, 1)
test_result = round(test_result * 100, 1)
current_dev_entry.append(dev_result)
current_test_entry.append(test_result)
dev_avg = round(np.mean(current_test_entry[1:]), 1)
test_avg = round(np.mean(current_test_entry[1:]), 1)
current_dev_entry.append(dev_avg)
current_test_entry.append(test_avg)
dev_table.append(current_dev_entry)
test_table.append(current_test_entry)
# Calculate mean of language columns per language over all models
last_dev_row = ["Language Avg."]
for index, _ in enumerate(languages):
dev_scores = []
for row in dev_table:
dev_scores.append(row[index + 1])
last_dev_row.append(round(np.mean(dev_scores), 1))
last_dev_row.append(round(np.mean(last_dev_row[1:]), 1))
dev_table.append(last_dev_row)
# Same for test set
last_test_row = ["Language Avg."]
for index, _ in enumerate(languages):
test_scores = []
for row in test_table:
test_scores.append(row[index + 1])
last_test_row.append(round(np.mean(test_scores), 1))
last_test_row.append(round(np.mean(last_test_row[1:]), 1))
test_table.append(last_test_row)
# Final tables
headers = ["Model Name"] + languages + ["Avg."]
print("Development Results:")
print(tabulate(dev_table, headers=headers, tablefmt="github"))
print("")
print("Test Results:")
print(tabulate(test_table, headers=headers, tablefmt="github"))