forked from mozilla/bugbug
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
119 lines (101 loc) · 3.59 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import sys
import numpy as np
from bugbug import bugzilla, db, repository
from bugbug.models import MODELS, get_model_class
def parse_args(args):
parser = argparse.ArgumentParser()
parser.add_argument(
"--lemmatization",
help="Perform lemmatization (using spaCy)",
action="store_true",
)
parser.add_argument(
"--training-set-size",
nargs="?",
default=14000,
type=int,
help="The size of the training set for the duplicate model",
)
parser.add_argument(
"--disable-url-cleanup",
help="Don't cleanup urls when training the duplicate model",
dest="cleanup_urls",
default=True,
action="store_false",
)
parser.add_argument("--train", help="Perform training", action="store_true")
parser.add_argument(
"--goal", help="Goal of the classifier", choices=MODELS.keys(), default="defect"
)
parser.add_argument(
"--classifier",
help="Type of the classifier. Only used for component classification.",
choices=["default", "nn"],
default="default",
)
parser.add_argument("--classify", help="Perform evaluation", action="store_true")
parser.add_argument(
"--historical",
help="""Analyze historical bugs. Only used for defect, bugtype,
defectenhancementtask and regression tasks.""",
action="store_true",
)
return parser.parse_args(args)
def main(args):
model_file_name = "{}{}model".format(
args.goal, "" if args.classifier == "default" else args.classifier
)
if args.goal == "component":
if args.classifier == "default":
model_class_name = "component"
else:
model_class_name = "component_nn"
else:
model_class_name = args.goal
model_class = get_model_class(model_class_name)
if args.train:
db.download(bugzilla.BUGS_DB)
db.download(repository.COMMITS_DB)
historical_supported_tasks = [
"defect",
"bugtype",
"defectenhancementtask",
"regression",
]
if args.goal in historical_supported_tasks:
model = model_class(args.lemmatization, args.historical)
elif args.goal == "duplicate":
model = model_class(
args.training_set_size, args.lemmatization, args.cleanup_urls
)
else:
model = model_class(args.lemmatization)
model.train()
else:
model = model_class.load(model_file_name)
if args.classify:
for bug in bugzilla.get_bugs():
print(
f'https://bugzilla.mozilla.org/show_bug.cgi?id={ bug["id"] } - { bug["summary"]} '
)
if model.calculate_importance:
probas, importance = model.classify(
bug, probabilities=True, importances=True
)
model.print_feature_importances(
importance["importances"], class_probabilities=probas
)
else:
probas = model.classify(bug, probabilities=True, importances=False)
if np.argmax(probas) == 1:
print(f"Positive! {probas}")
else:
print(f"Negative! {probas}")
input()
if __name__ == "__main__":
main(parse_args(sys.argv[1:]))