From 1ee92e6cd1c0e139ba7c8910558bd57c487657cf Mon Sep 17 00:00:00 2001 From: Priit Parmakson Date: Tue, 7 Jan 2020 20:17:11 +0200 Subject: [PATCH 1/5] Author stats over multiple repos --- README.md | 2 + git_of_theseus/__init__.py | 1 + git_of_theseus/merge.py | 83 ++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 87 insertions(+) create mode 100644 git_of_theseus/merge.py diff --git a/README.md b/README.md index c761813..8c0accd 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ You can also normalize it to 100%. Here's author statistics for Git: ![git](https://raw.githubusercontent.com/erikbern/git-of-theseus/master/pics/git-git-authors-normalized.png) +To plot author statistics over several repos, run `git-of-theseus-analyze` for each of the repos, then run `git-of-theseus-merge` to merge the `authors.json` files. The run `git-of-theseus-stack-plot authors.json` on the resulting file. + Other stuff ----------- diff --git a/git_of_theseus/__init__.py b/git_of_theseus/__init__.py index 2080740..beb2e29 100644 --- a/git_of_theseus/__init__.py +++ b/git_of_theseus/__init__.py @@ -1,3 +1,4 @@ from git_of_theseus.stack_plot import stack_plot, stack_plot_cmdline from git_of_theseus.survival_plot import survival_plot, survival_plot_cmdline from git_of_theseus.analyze import analyze, analyze_cmdline +from git_of_theseus.merge import merge, merge_cmdline diff --git a/git_of_theseus/merge.py b/git_of_theseus/merge.py new file mode 100644 index 0000000..77006b2 --- /dev/null +++ b/git_of_theseus/merge.py @@ -0,0 +1,83 @@ +# Copyright 2020 Priit Parmakson +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script merges authors.json files produced by +# git-of-theseus-analyze, so that authors chart can be produced +# over multiple repos. Output is written to file mergedAuthors.json +# by default. +# +# Usage: +# python merge [--outfile ] ... +# +# e.g.: +# python merge --outfile chart.png authors1.json authors2.json + +import argparse, json, os +from collections import defaultdict + +def merge(input_fns, outfile='mergedAuthors.json'): + + loc = {} # Helper data structure + authors = set() # All authors + tss = set() # All timestamps + for fn in input_fns: + print('Reading %s' % fn) + data = json.load(open(fn)) + locr = defaultdict(defaultdict) + for i, a in enumerate(data['labels']): + authors.add(a) + locr[a] = {} + for j, t in enumerate(data['ts']): + tss.add(t) + locr[a][t] = data['y'][i][j] + loc[fn] = locr + + authorss = sorted(authors) # Authors, sorted + tsss = sorted(tss) # Timestamps, sorted + + merged = [[0 for j in range(len(tsss))] for i in range(len(authorss))] + + for i, r in enumerate(loc): + # print("repo: ", r) + for j, a in enumerate(authorss): + # print(" ", a) + l = 0 + for k, t in enumerate(tsss): + # print(r, a, t) + if a in loc[r].keys(): + if t in loc[r][a].keys(): + l = loc[r][a][t] + # print("l = ", l) + merged[j][k] = merged[j][k] + l + + print('Writing merged authors data to %s' % outfile) + f = open(outfile, 'w') + json.dump( + { + 'y': merged, + 'ts': [t for t in tsss], + 'labels': [a for a in authorss] + }, f) + f.close() + +def merge_cmdline(): + parser = argparse.ArgumentParser(description='Merge author stats files') + parser.add_argument('--outfile', default='mergedAuthors.json', type=str, help='Output file to store results (default: %(default)s)') + parser.add_argument('input_fns', nargs='*') + kwargs = vars(parser.parse_args()) + + merge(**kwargs) + +if __name__ == '__main__': + merge_cmdline() \ No newline at end of file diff --git a/setup.py b/setup.py index 49888c1..e63553c 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ 'git-of-theseus-analyze=git_of_theseus.analyze:analyze_cmdline', 'git-of-theseus-survival-plot=git_of_theseus:survival_plot_cmdline', 'git-of-theseus-stack-plot=git_of_theseus:stack_plot_cmdline' + 'git-of-theseus-merge=git_of_theseus:merge_cmdline' ] } ) From 9fa91374d7e7d35113bd501aae5f5eaa612d4fc4 Mon Sep 17 00:00:00 2001 From: Priit Parmakson Date: Sat, 11 Jan 2020 17:49:04 +0200 Subject: [PATCH 2/5] Integrated multi-repo option into stack_plot; added two tests. --- README.md | 4 +- git_of_theseus/.gitignore | 1 + git_of_theseus/merge.py | 83 --------------------------------- git_of_theseus/stack_plot.py | 59 +++++++++++++++++++++-- setup.py | 1 - tests/test_data_merged_1_2.json | 19 ++++++++ tests/test_data_repo_1.json | 15 ++++++ tests/test_data_repo_2.json | 15 ++++++ tests/tests.py | 35 ++++++++++++++ 9 files changed, 142 insertions(+), 90 deletions(-) create mode 100644 git_of_theseus/.gitignore delete mode 100644 git_of_theseus/merge.py create mode 100644 tests/test_data_merged_1_2.json create mode 100644 tests/test_data_repo_1.json create mode 100644 tests/test_data_repo_2.json create mode 100644 tests/tests.py diff --git a/README.md b/README.md index 8c0accd..1b4977f 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ After that, you can generate plots! Here are some ways you can do that: 1. Run `git-of-theseus-stack-plot cohorts.json` which will write to `stack_plot.png` 1. Run `git-of-theseus-survival-plot survival.json` which will write to `survival_plot.png` (run it with `--help` for some options) -If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot ` (optionally with the `--exp-fit` flag to fit an exponential decay) +If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot ` (optionally with the `--exp-fit` flag to fit an exponential decay) and `git-of-theseus-stack-plot `. Help ---- @@ -65,8 +65,6 @@ You can also normalize it to 100%. Here's author statistics for Git: ![git](https://raw.githubusercontent.com/erikbern/git-of-theseus/master/pics/git-git-authors-normalized.png) -To plot author statistics over several repos, run `git-of-theseus-analyze` for each of the repos, then run `git-of-theseus-merge` to merge the `authors.json` files. The run `git-of-theseus-stack-plot authors.json` on the resulting file. - Other stuff ----------- diff --git a/git_of_theseus/.gitignore b/git_of_theseus/.gitignore new file mode 100644 index 0000000..6ca27fe --- /dev/null +++ b/git_of_theseus/.gitignore @@ -0,0 +1 @@ +LOCAL* diff --git a/git_of_theseus/merge.py b/git_of_theseus/merge.py deleted file mode 100644 index 77006b2..0000000 --- a/git_of_theseus/merge.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2020 Priit Parmakson -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script merges authors.json files produced by -# git-of-theseus-analyze, so that authors chart can be produced -# over multiple repos. Output is written to file mergedAuthors.json -# by default. -# -# Usage: -# python merge [--outfile ] ... -# -# e.g.: -# python merge --outfile chart.png authors1.json authors2.json - -import argparse, json, os -from collections import defaultdict - -def merge(input_fns, outfile='mergedAuthors.json'): - - loc = {} # Helper data structure - authors = set() # All authors - tss = set() # All timestamps - for fn in input_fns: - print('Reading %s' % fn) - data = json.load(open(fn)) - locr = defaultdict(defaultdict) - for i, a in enumerate(data['labels']): - authors.add(a) - locr[a] = {} - for j, t in enumerate(data['ts']): - tss.add(t) - locr[a][t] = data['y'][i][j] - loc[fn] = locr - - authorss = sorted(authors) # Authors, sorted - tsss = sorted(tss) # Timestamps, sorted - - merged = [[0 for j in range(len(tsss))] for i in range(len(authorss))] - - for i, r in enumerate(loc): - # print("repo: ", r) - for j, a in enumerate(authorss): - # print(" ", a) - l = 0 - for k, t in enumerate(tsss): - # print(r, a, t) - if a in loc[r].keys(): - if t in loc[r][a].keys(): - l = loc[r][a][t] - # print("l = ", l) - merged[j][k] = merged[j][k] + l - - print('Writing merged authors data to %s' % outfile) - f = open(outfile, 'w') - json.dump( - { - 'y': merged, - 'ts': [t for t in tsss], - 'labels': [a for a in authorss] - }, f) - f.close() - -def merge_cmdline(): - parser = argparse.ArgumentParser(description='Merge author stats files') - parser.add_argument('--outfile', default='mergedAuthors.json', type=str, help='Output file to store results (default: %(default)s)') - parser.add_argument('input_fns', nargs='*') - kwargs = vars(parser.parse_args()) - - merge(**kwargs) - -if __name__ == '__main__': - merge_cmdline() \ No newline at end of file diff --git a/git_of_theseus/stack_plot.py b/git_of_theseus/stack_plot.py index 6d8ee1f..543a6cd 100644 --- a/git_of_theseus/stack_plot.py +++ b/git_of_theseus/stack_plot.py @@ -19,6 +19,7 @@ import argparse, dateutil.parser, itertools, json, numpy, sys from matplotlib import pyplot +from collections import defaultdict def generate_n_colors(n): @@ -32,8 +33,59 @@ def euclidean(a, b): return colors -def stack_plot(input_fn, display=False, outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False): - data = json.load(open(input_fn)) # TODO do we support multiple arguments here? +def stack_plot(input_fns, display=False, + outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False, outmerged=False): + + loc = {} # Helper data structure + authors = set() # All authors + tss = set() # All timestamps + for fn in input_fns: + print('Reading %s' % fn) + data = json.load(open(fn)) + locr = defaultdict(defaultdict) + for i, a in enumerate(data['labels']): + authors.add(a) + locr[a] = {} + for j, t in enumerate(data['ts']): + tss.add(t) + locr[a][t] = data['y'][i][j] + loc[fn] = locr + + authorss = sorted(authors) # Authors, sorted + tsss = sorted(tss) # Timestamps, sorted + + merged = [[0 for j in range(len(tsss))] for i in range(len(authorss))] + + for i, r in enumerate(loc): + # print("repo: ", r) + for j, a in enumerate(authorss): + # print(" ", a) + l = 0 + for k, t in enumerate(tsss): + # print(r, a, t) + if a in loc[r].keys(): + if t in loc[r][a].keys(): + l = loc[r][a][t] + # print("l = ", l) + merged[j][k] = merged[j][k] + l + + data = { + 'y': merged, + 'ts': [t for t in tsss], + 'labels': [a for a in authorss] + } + if outmerged: + mergefn = 'merged.json' + print('Writing data to %s' % mergefn) + f = open(mergefn, 'w') + json.dump( + { + 'y': merged, + 'ts': [t for t in tsss], + 'labels': [a for a in authorss] + }, f) + f.close() + y = numpy.array(data['y']) if y.shape[0] > max_n: js = sorted(range(len(data['labels'])), key=lambda j: max(y[j]), reverse=True) @@ -74,7 +126,8 @@ def stack_plot_cmdline(): parser.add_argument('--max-n', default=20, type=int, help='Max number of dataseries (will roll everything else into "other") (default: %(default)s)') parser.add_argument('--normalize', action='store_true', help='Normalize the plot to 100%%') parser.add_argument('--dont-stack', action='store_true', help='Don\'t stack plot') - parser.add_argument('input_fn') + parser.add_argument('--outmerged', action='store_true', help='Output merged data to merged.json') + parser.add_argument('input_fns', nargs='*') kwargs = vars(parser.parse_args()) stack_plot(**kwargs) diff --git a/setup.py b/setup.py index e63553c..49888c1 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,6 @@ 'git-of-theseus-analyze=git_of_theseus.analyze:analyze_cmdline', 'git-of-theseus-survival-plot=git_of_theseus:survival_plot_cmdline', 'git-of-theseus-stack-plot=git_of_theseus:stack_plot_cmdline' - 'git-of-theseus-merge=git_of_theseus:merge_cmdline' ] } ) diff --git a/tests/test_data_merged_1_2.json b/tests/test_data_merged_1_2.json new file mode 100644 index 0000000..baef7be --- /dev/null +++ b/tests/test_data_merged_1_2.json @@ -0,0 +1,19 @@ +{ + "y": [ + [100, 170, 600, 400, 700], + [0, 0, 150, 150, 300], + [0, 50, 150, 200, 200] + ], + "ts": [ + "2019-01-01T07:00:00", + "2019-02-01T07:00:00", + "2019-04-01T08:00:00", + "2019-06-01T08:00:00", + "2019-08-01T08:00:00" + ], + "labels": [ + "Author A", + "Author B", + "Author C" + ] +} \ No newline at end of file diff --git a/tests/test_data_repo_1.json b/tests/test_data_repo_1.json new file mode 100644 index 0000000..0af57d8 --- /dev/null +++ b/tests/test_data_repo_1.json @@ -0,0 +1,15 @@ +{ + "y": [ + [100, 200, 500], + [0, 150, 300] + ], + "ts": [ + "2019-01-01T07:00:00", + "2019-04-01T08:00:00", + "2019-08-01T08:00:00" + ], + "labels": [ + "Author A", + "Author B" + ] +} \ No newline at end of file diff --git a/tests/test_data_repo_2.json b/tests/test_data_repo_2.json new file mode 100644 index 0000000..d0559ec --- /dev/null +++ b/tests/test_data_repo_2.json @@ -0,0 +1,15 @@ +{ + "y": [ + [70, 400, 200], + [50, 150, 200] + ], + "ts": [ + "2019-02-01T07:00:00", + "2019-04-01T08:00:00", + "2019-06-01T08:00:00" + ], + "labels": [ + "Author A", + "Author C" + ] +} \ No newline at end of file diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000..a58c33a --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,35 @@ +# Tests for stack_plot +# +# To run tests: +# (1) ensure that git-of-theuseus in installed +# (2) go to folder tests and +# (3) python tests.py + +import json +from git_of_theseus import stack_plot + +print('Testing stack_plot...') + +print('Test 1 - Run stack_plot for repos 1 and 2') +out_fn = 'stack_plot.png' +in_fns = ['test_data_repo_1.json', 'test_data_repo_2.json'] + +stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True) + +# merged.json and test_data_merged.json must have equal JSON contents. +if json.load(open('merged.json')) == json.load(open('test_data_merged_1_2.json')): + print('Test succeeded') +else: + print('Test failed') + +print('Test 2 - Run stack_plot for repo 1') +out_fn = 'stack_plot.png' +in_fns = ['test_data_repo_1.json'] + +stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True) + +# merged.json and test_data_merged.json must have equal JSON contents. +if json.load(open('merged.json')) == json.load(open('test_data_repo_1.json')): + print('Test succeeded') +else: + print('Test failed') From 7596c0f56d82bb35e189eeb80f42c16481e2e67c Mon Sep 17 00:00:00 2001 From: Priit Parmakson Date: Sat, 11 Jan 2020 18:06:21 +0200 Subject: [PATCH 3/5] Put .gitignore into wrong directory. Trying again. --- git_of_theseus/.gitignore => .gitignore | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename git_of_theseus/.gitignore => .gitignore (100%) diff --git a/git_of_theseus/.gitignore b/.gitignore similarity index 100% rename from git_of_theseus/.gitignore rename to .gitignore From 190201e9a28fb956f430df359889f1bec1019b31 Mon Sep 17 00:00:00 2001 From: Priit Parmakson Date: Sat, 11 Jan 2020 22:09:14 +0200 Subject: [PATCH 4/5] Updated xvfb in travis.yml to Ubuntu 16.04 (Xenial) --- .travis.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 927a485..844944a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,10 +4,9 @@ python: - "3.4" install: - pip install . -before_script: # configure a headless display to test plot generation - - "export DISPLAY=:99.0" - - "sh -e /etc/init.d/xvfb start" - - sleep 3 # give xvfb some time to start +dist: xenial +services: + - xvfb script: - git clone https://github.com/erikbern/git-of-theseus - git-of-theseus-analyze git-of-theseus --outdir got From d266a2ab96b78b3e588b316b3854917deb585625 Mon Sep 17 00:00:00 2001 From: Priit Parmakson Date: Sat, 11 Jan 2020 22:26:51 +0200 Subject: [PATCH 5/5] Removed erroneus line from __init__.py --- git_of_theseus/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/git_of_theseus/__init__.py b/git_of_theseus/__init__.py index beb2e29..2080740 100644 --- a/git_of_theseus/__init__.py +++ b/git_of_theseus/__init__.py @@ -1,4 +1,3 @@ from git_of_theseus.stack_plot import stack_plot, stack_plot_cmdline from git_of_theseus.survival_plot import survival_plot, survival_plot_cmdline from git_of_theseus.analyze import analyze, analyze_cmdline -from git_of_theseus.merge import merge, merge_cmdline