forked from mozilla/bugbug
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
110 lines (93 loc) · 4.38 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import csv
import os
from datetime import datetime
from datetime import timedelta
import numpy as np
from bugbug import bugzilla
from bugbug import db
from bugbug import repository # noqa
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--lemmatization', help='Perform lemmatization (using spaCy)', action='store_true')
parser.add_argument('--train', help='Perform training', action='store_true')
parser.add_argument('--goal',
help='Goal of the classifier',
choices=['bug', 'regression', 'tracking', 'qaneeded', 'uplift', 'component', 'devdocneeded', 'defectfeaturetask'],
default='bug')
parser.add_argument('--classifier', help='Type of the classifier', choices=['default', 'nn'], default='default')
parser.add_argument('--classify', help='Perform evaluation', action='store_true')
parser.add_argument('--generate-sheet', help='Perform evaluation on bugs from last week and generate a csv file', action='store_true')
args = parser.parse_args()
model_file_name = '{}{}model'.format(
args.goal,
'' if args.classifier == 'default' else args.classifier
)
if args.goal == 'bug':
from bugbug.models.bug import BugModel
model_class = BugModel
elif args.goal == 'defectfeaturetask':
from bugbug.models.defect_feature_task import DefectFeatureTaskModel
model_class = DefectFeatureTaskModel
elif args.goal == 'regression':
from bugbug.models.regression import RegressionModel
model_class = RegressionModel
elif args.goal == 'tracking':
from bugbug.models.tracking import TrackingModel
model_class = TrackingModel
elif args.goal == 'qaneeded':
from bugbug.models.qaneeded import QANeededModel
model_class = QANeededModel
elif args.goal == 'uplift':
from bugbug.models.uplift import UpliftModel
model_class = UpliftModel
elif args.goal == 'component':
if args.classifier == 'default':
from bugbug.models.component import ComponentModel
model_class = ComponentModel
elif args.classifier == 'nn':
from bugbug.models.component_nn import ComponentNNModel
model_class = ComponentNNModel
elif args.goal == 'devdocneeded':
from bugbug.models.devdocneeded import DevDocNeededModel
model_class = DevDocNeededModel
if args.train:
db.download()
model = model_class(args.lemmatization)
model.train()
else:
model = model_class.load(model_file_name)
if args.classify:
for bug in bugzilla.get_bugs():
print(f'https://bugzilla.mozilla.org/show_bug.cgi?id={ bug["id"] } - { bug["summary"]} ')
if model.calculate_importance:
probas, importances = model.classify(bug, probabilities=True, importances=True)
feature_names = model.get_feature_names()
for i, (importance, index, is_positive) in enumerate(importances):
print(f'{i + 1}. \'{feature_names[int(index)]}\' ({"+" if (is_positive) else "-"}{importance})')
else:
probas = model.classify(bug, probabilities=True, importances=False)
if np.argmax(probas) == 1:
print(f'Positive! {probas}')
else:
print(f'Negative! {probas}')
input()
if args.generate_sheet:
today = datetime.utcnow()
a_week_ago = today - timedelta(7)
bugs = bugzilla.download_bugs_between(a_week_ago, today)
print(f'Classifying {len(bugs)} bugs...')
rows = [
['Bug', f'{args.goal}(model)', args.goal, 'Title']
]
for bug in bugs:
p = model.classify(bug, probabilities=True)
rows.append([f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}', 'y' if p[0][1] >= 0.7 else 'n', '', bug['summary']])
os.makedirs('sheets', exist_ok=True)
with open(os.path.join('sheets', f'{args.goal}-{datetime.utcnow().strftime("%Y-%m-%d")}-labels.csv'), 'w') as f:
writer = csv.writer(f)
writer.writerows(rows)