Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Author stats over multiple repos #70

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
LOCAL*
7 changes: 3 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ python:
- "3.4"
install:
- pip install .
before_script: # configure a headless display to test plot generation
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
- sleep 3 # give xvfb some time to start

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dist: xenial
services:
- xvfb
script:
- git clone https://github.com/erikbern/git-of-theseus
- git-of-theseus-analyze git-of-theseus --outdir got
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ After that, you can generate plots! Here are some ways you can do that:
1. Run `git-of-theseus-stack-plot cohorts.json` which will write to `stack_plot.png`
1. Run `git-of-theseus-survival-plot survival.json` which will write to `survival_plot.png` (run it with `--help` for some options)

If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot <foo/survival.json> <bar/survival.json>` (optionally with the `--exp-fit` flag to fit an exponential decay)
If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot <foo/survival.json> <bar/survival.json>` (optionally with the `--exp-fit` flag to fit an exponential decay) and `git-of-theseus-stack-plot <foo/authors.json> <bar/authors.json>`.

Help
----
Expand Down
59 changes: 56 additions & 3 deletions git_of_theseus/stack_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import argparse, dateutil.parser, itertools, json, numpy, sys
from matplotlib import pyplot
from collections import defaultdict


def generate_n_colors(n):
Expand All @@ -32,8 +33,59 @@ def euclidean(a, b):
return colors


def stack_plot(input_fn, display=False, outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False):
data = json.load(open(input_fn)) # TODO do we support multiple arguments here?
def stack_plot(input_fns, display=False,
outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False, outmerged=False):

loc = {} # Helper data structure
authors = set() # All authors
tss = set() # All timestamps
for fn in input_fns:
print('Reading %s' % fn)
data = json.load(open(fn))
locr = defaultdict(defaultdict)
for i, a in enumerate(data['labels']):
authors.add(a)
locr[a] = {}
for j, t in enumerate(data['ts']):
tss.add(t)
locr[a][t] = data['y'][i][j]
loc[fn] = locr

authorss = sorted(authors) # Authors, sorted
tsss = sorted(tss) # Timestamps, sorted

merged = [[0 for j in range(len(tsss))] for i in range(len(authorss))]

for i, r in enumerate(loc):
# print("repo: ", r)
for j, a in enumerate(authorss):
# print(" ", a)
l = 0
for k, t in enumerate(tsss):
# print(r, a, t)
if a in loc[r].keys():
if t in loc[r][a].keys():
l = loc[r][a][t]
# print("l = ", l)
merged[j][k] = merged[j][k] + l

data = {
'y': merged,
'ts': [t for t in tsss],
'labels': [a for a in authorss]
}
if outmerged:
mergefn = 'merged.json'
print('Writing data to %s' % mergefn)
f = open(mergefn, 'w')
json.dump(
{
'y': merged,
'ts': [t for t in tsss],
'labels': [a for a in authorss]
}, f)
f.close()

y = numpy.array(data['y'])
if y.shape[0] > max_n:
js = sorted(range(len(data['labels'])), key=lambda j: max(y[j]), reverse=True)
Expand Down Expand Up @@ -74,7 +126,8 @@ def stack_plot_cmdline():
parser.add_argument('--max-n', default=20, type=int, help='Max number of dataseries (will roll everything else into "other") (default: %(default)s)')
parser.add_argument('--normalize', action='store_true', help='Normalize the plot to 100%%')
parser.add_argument('--dont-stack', action='store_true', help='Don\'t stack plot')
parser.add_argument('input_fn')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test

parser.add_argument('--outmerged', action='store_true', help='Output merged data to merged.json')
parser.add_argument('input_fns', nargs='*')
kwargs = vars(parser.parse_args())

stack_plot(**kwargs)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_data_merged_1_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"y": [
[100, 170, 600, 400, 700],
[0, 0, 150, 150, 300],
[0, 50, 150, 200, 200]
],
"ts": [
"2019-01-01T07:00:00",
"2019-02-01T07:00:00",
"2019-04-01T08:00:00",
"2019-06-01T08:00:00",
"2019-08-01T08:00:00"
],
"labels": [
"Author A",
"Author B",
"Author C"
]
}
15 changes: 15 additions & 0 deletions tests/test_data_repo_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"y": [
[100, 200, 500],
[0, 150, 300]
],
"ts": [
"2019-01-01T07:00:00",
"2019-04-01T08:00:00",
"2019-08-01T08:00:00"
],
"labels": [
"Author A",
"Author B"
]
}
15 changes: 15 additions & 0 deletions tests/test_data_repo_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"y": [
[70, 400, 200],
[50, 150, 200]
],
"ts": [
"2019-02-01T07:00:00",
"2019-04-01T08:00:00",
"2019-06-01T08:00:00"
],
"labels": [
"Author A",
"Author C"
]
}
35 changes: 35 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Tests for stack_plot
#
# To run tests:
# (1) ensure that git-of-theuseus in installed
# (2) go to folder tests and
# (3) python tests.py

import json
from git_of_theseus import stack_plot

print('Testing stack_plot...')

print('Test 1 - Run stack_plot for repos 1 and 2')
out_fn = 'stack_plot.png'
in_fns = ['test_data_repo_1.json', 'test_data_repo_2.json']

stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True)

# merged.json and test_data_merged.json must have equal JSON contents.
if json.load(open('merged.json')) == json.load(open('test_data_merged_1_2.json')):
print('Test succeeded')
else:
print('Test failed')

print('Test 2 - Run stack_plot for repo 1')
out_fn = 'stack_plot.png'
in_fns = ['test_data_repo_1.json']

stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True)

# merged.json and test_data_merged.json must have equal JSON contents.
if json.load(open('merged.json')) == json.load(open('test_data_repo_1.json')):
print('Test succeeded')
else:
print('Test failed')