Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for blacklists for authors and paths #47

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 63 additions & 42 deletions gitstats
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ conf = {
'linear_linestats': 1,
'project_name': '',
'processes': 8,
'start_date': ''
'start_date': '',
'excluded_authors': [],
'excluded_prefixes': []
}

def getpipeoutput(cmds, quiet = False):
Expand Down Expand Up @@ -322,6 +324,8 @@ class GitDataCollector(DataCollector):
parts = re.split('\s+', line, 2)
commits = int(parts[1])
author = parts[2]
if author in conf["excluded_authors"]:
continue
self.tags[tag]['commits'] += commits
self.tags[tag]['authors'][author] = commits

Expand All @@ -338,6 +342,8 @@ class GitDataCollector(DataCollector):
timezone = parts[3]
author, mail = parts[4].split('<', 1)
author = author.rstrip()
if author in conf["excluded_authors"]:
continue
mail = mail.rstrip('>')
domain = '?'
if mail.find('@') != -1:
Expand Down Expand Up @@ -434,14 +440,16 @@ class GitDataCollector(DataCollector):
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1

# outputs "<stamp> <files>" for each revision
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T %%an" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
lines = []
revs_to_read = []
time_rev_count = []
#Look up rev in cache and take info from cache if found
#If not append rev to list of rev to read from repo
for revline in revlines:
time, rev = revline.split(' ')
time, rev, author = revline.split(' ')
if author in conf["excluded_authors"]:
continue
#if cache empty then add time and rev to list of new rev's
#otherwise try to read needed info from cache
if 'files_in_tree' not in self.cache.keys():
Expand Down Expand Up @@ -489,6 +497,14 @@ class GitDataCollector(DataCollector):
blob_id = parts[2]
size = int(parts[3])
fullpath = parts[4]
exclude = False
for path in conf["excluded_prefixes"]:
if fullpath.startswith(path):
exclude = True
break

if exclude:
continue

self.total_size += size
self.total_files += 1
Expand Down Expand Up @@ -540,6 +556,7 @@ class GitDataCollector(DataCollector):
lines.reverse()
files = 0; inserted = 0; deleted = 0; total_lines = 0
author = None
last_line = ""
for line in lines:
if len(line) == 0:
continue
Expand All @@ -550,35 +567,36 @@ class GitDataCollector(DataCollector):
if pos != -1:
try:
(stamp, author) = (int(line[:pos]), line[pos+1:])
self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }

date = datetime.datetime.fromtimestamp(stamp)
yymm = date.strftime('%Y-%m')
self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted

yy = date.year
self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted

files, inserted, deleted = 0, 0, 0
if author not in conf["excluded_authors"]:
self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }

date = datetime.datetime.fromtimestamp(stamp)
yymm = date.strftime('%Y-%m')
self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted

yy = date.year
self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted

files, inserted, deleted = 0, 0, 0

numbers = getstatsummarycounts(last_line)
if len(numbers) == 3:
(files, inserted, deleted) = map(lambda el : int(el), numbers)
total_lines += inserted
total_lines -= deleted
self.total_lines_added += inserted
self.total_lines_removed += deleted
else:
print 'Warning: failed to handle line "%s"' % line
(files, inserted, deleted) = (0, 0, 0)
except ValueError:
print 'Warning: unexpected line "%s"' % line
else:
print 'Warning: unexpected line "%s"' % line
else:
numbers = getstatsummarycounts(line)

if len(numbers) == 3:
(files, inserted, deleted) = map(lambda el : int(el), numbers)
total_lines += inserted
total_lines -= deleted
self.total_lines_added += inserted
self.total_lines_removed += deleted

else:
print 'Warning: failed to handle line "%s"' % line
(files, inserted, deleted) = (0, 0, 0)
last_line = line
#self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
self.total_lines += total_lines

Expand Down Expand Up @@ -606,21 +624,22 @@ class GitDataCollector(DataCollector):
try:
oldstamp = stamp
(stamp, author) = (int(line[:pos]), line[pos+1:])
if oldstamp > stamp:
# clock skew, keep old timestamp to avoid having ugly graph
stamp = oldstamp
if author not in self.authors:
self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
if stamp not in self.changes_by_date_by_author:
self.changes_by_date_by_author[stamp] = {}
if author not in self.changes_by_date_by_author[stamp]:
self.changes_by_date_by_author[stamp][author] = {}
self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
files, inserted, deleted = 0, 0, 0
if author not in conf["excluded_authors"]:
if oldstamp > stamp:
# clock skew, keep old timestamp to avoid having ugly graph
stamp = oldstamp
if author not in self.authors:
self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
if stamp not in self.changes_by_date_by_author:
self.changes_by_date_by_author[stamp] = {}
if author not in self.changes_by_date_by_author[stamp]:
self.changes_by_date_by_author[stamp][author] = {}
self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
files, inserted, deleted = 0, 0, 0
except ValueError:
print 'Warning: unexpected line "%s"' % line
else:
Expand Down Expand Up @@ -1424,6 +1443,8 @@ class GitStats:
raise KeyError('no such key "%s" in config' % key)
if isinstance(conf[key], int):
conf[key] = int(value)
elif isinstance(conf[key], list):
conf[key].append(value)
else:
conf[key] = value
elif o in ('-h', '--help'):
Expand Down