bugbug/run.py at master · yixinsun/bugbug · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import argparse
import csv
import os
from datetime import datetime
from datetime import timedelta

import numpy as np

from bugbug import bugzilla
from bugbug import db
from bugbug import repository  # noqa

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--lemmatization', help='Perform lemmatization (using spaCy)', action='store_true')
    parser.add_argument('--train', help='Perform training', action='store_true')
    parser.add_argument('--goal',
                        help='Goal of the classifier',
                        choices=['bug', 'regression', 'tracking', 'qaneeded', 'uplift', 'component', 'devdocneeded', 'defectfeaturetask'],
                        default='bug')
    parser.add_argument('--classifier', help='Type of the classifier', choices=['default', 'nn'], default='default')
    parser.add_argument('--classify', help='Perform evaluation', action='store_true')
    parser.add_argument('--generate-sheet', help='Perform evaluation on bugs from last week and generate a csv file', action='store_true')
    args = parser.parse_args()

    model_file_name = '{}{}model'.format(
        args.goal,
        '' if args.classifier == 'default' else args.classifier
    )

    if args.goal == 'bug':
        from bugbug.models.bug import BugModel
        model_class = BugModel
    elif args.goal == 'defectfeaturetask':
        from bugbug.models.defect_feature_task import DefectFeatureTaskModel
        model_class = DefectFeatureTaskModel
    elif args.goal == 'regression':
        from bugbug.models.regression import RegressionModel
        model_class = RegressionModel
    elif args.goal == 'tracking':
        from bugbug.models.tracking import TrackingModel
        model_class = TrackingModel
    elif args.goal == 'qaneeded':
        from bugbug.models.qaneeded import QANeededModel
        model_class = QANeededModel
    elif args.goal == 'uplift':
        from bugbug.models.uplift import UpliftModel
        model_class = UpliftModel
    elif args.goal == 'component':
        if args.classifier == 'default':
            from bugbug.models.component import ComponentModel
            model_class = ComponentModel
        elif args.classifier == 'nn':
            from bugbug.models.component_nn import ComponentNNModel
            model_class = ComponentNNModel
    elif args.goal == 'devdocneeded':
        from bugbug.models.devdocneeded import DevDocNeededModel
        model_class = DevDocNeededModel

    if args.train:
        db.download()

        model = model_class(args.lemmatization)
        model.train()
    else:
        model = model_class.load(model_file_name)

    if args.classify:
        for bug in bugzilla.get_bugs():
            print(f'https://bugzilla.mozilla.org/show_bug.cgi?id={ bug["id"] } - { bug["summary"]} ')

            if model.calculate_importance:
                probas, importances = model.classify(bug, probabilities=True, importances=True)

                feature_names = model.get_feature_names()
                for i, (importance, index, is_positive) in enumerate(importances):
                    print(f'{i + 1}. \'{feature_names[int(index)]}\' ({"+" if (is_positive) else "-"}{importance})')
            else:
                probas = model.classify(bug, probabilities=True, importances=False)

            if np.argmax(probas) == 1:
                print(f'Positive! {probas}')
            else:
                print(f'Negative! {probas}')
            input()

    if args.generate_sheet:
        today = datetime.utcnow()
        a_week_ago = today - timedelta(7)
        bugs = bugzilla.download_bugs_between(a_week_ago, today)

        print(f'Classifying {len(bugs)} bugs...')

        rows = [
            ['Bug', f'{args.goal}(model)', args.goal, 'Title']
        ]

        for bug in bugs:
            p = model.classify(bug, probabilities=True)
            rows.append([f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}', 'y' if p[0][1] >= 0.7 else 'n', '', bug['summary']])

        os.makedirs('sheets', exist_ok=True)
        with open(os.path.join('sheets', f'{args.goal}-{datetime.utcnow().strftime("%Y-%m-%d")}-labels.csv'), 'w') as f:
            writer = csv.writer(f)
            writer.writerows(rows)