-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
executable file
·115 lines (93 loc) · 3.96 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import git
import argparse
import json
from pydriller import RepositoryMining
from apriori import Apriori
from parsers.base import BaseParser
from parsers.golang import GoParser
from parsers.java import JavaParser
from parsers.python import PythonParser
from settings import CLONE_PATH, DEBUG
from utils.utils import *
from pprint import pprint
p = argparse.ArgumentParser(description="Analyze co-changes in source code.")
p.add_argument("-r", "--repository", help="Remote Git repository", required=False)
p.add_argument("-d", "--dir", help="Local project directory", required=False)
p.add_argument("-l", "--lang", help="Programming language", choices=['python', 'java', 'go'])
p.add_argument("-s", "--support", help="correlation support value", default=0.5)
p.add_argument("-c", "--confidence", help="correlation support value", default=0.5)
p.add_argument("-L", "--max_length", help="max number of items in a rule", default=2)
p.add_argument("-t", "--transactions", help="transactions file", required=False)
p.add_argument("-ot", "--out_transactions", help="write transactions file", required=False)
p.add_argument("-m", "--max_commits", help="Number of commits to analyze", required=False)
p.add_argument("-csv", "--csv", help="Output in CSV format", required=False, action='store_true')
def main():
args = p.parse_args()
confidence = float(args.confidence)
support = float(args.support)
max_length = int(args.max_length)
transactions_file_name = args.transactions
max_commits = args.max_commits
print("support=%.3f" % support)
print("confidence=%.3f" % confidence)
print("max_length=%d" % max_length)
if max_commits:
print("max_commits=%d" % int(max_commits))
transactions = []
if transactions_file_name is None:
gitpy = git.Git(CLONE_PATH)
# fetching repository / folder
project_path = args.dir
if args.repository:
project_folder = "clone-%s" % generate_hash()
project_path = "%s/%s" % (CLONE_PATH, project_folder)
print("Cloning repository to %s..." % project_path)
gitpy.clone(args.repository, project_folder)
print("number of commits: %d" % (get_commit_count(project_path)))
print("support absolute value: %d" % (support * get_commit_count(project_path)))
# defining language parser
parser = BaseParser
if args.lang == "go":
parser = GoParser
elif args.lang == "java":
parser = JavaParser
elif args.lang == "python":
parser = PythonParser
print("fetching transactions...")
commits = RepositoryMining(project_path).traverse_commits()
for index, commit in enumerate(commits):
if max_commits and index > int(max_commits):
break
language_parser = parser(project_path, commit.hash)
items = language_parser.get_diff()
if items:
transactions.append(items)
else:
transaction_json = open(transactions_file_name, 'r').read()
transactions = json.loads(transaction_json)
if DEBUG:
print(ansi_color_yellow("Transactions:"))
for i, changes in enumerate(transactions):
print("%3d: " % i, end='')
print(changes)
if args.out_transactions:
out_file = open(args.out_transactions, 'w')
json.dump(transactions, out_file, indent=2)
print("analyzing transactions...")
apriori = Apriori(transactions, confidence=float(confidence), support=float(support), max_length=int(max_length))
print(ansi_color_yellow("Association rules:"))
# Output in CSV format
if args.csv:
apriori.get_rules_csv(1000)
else:
rules = apriori.get_rules()
for i, rule in enumerate(rules):
print("%3d: " % i, end='')
print(rule)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("finished by user :)")