erikbern · PriitParmakson · Jan 7, 2020 · Jan 11, 2020 · Jan 11, 2020 · Jan 11, 2020
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+LOCAL*
diff --git a/.travis.yml b/.travis.yml
@@ -4,10 +4,9 @@ python:
   - "3.4"
 install:
   - pip install .
-before_script: # configure a headless display to test plot generation
-  - "export DISPLAY=:99.0"
-  - "sh -e /etc/init.d/xvfb start"
-  - sleep 3 # give xvfb some time to start
+dist: xenial
+services:
+  - xvfb  
 script:
   - git clone https://github.com/erikbern/git-of-theseus
   - git-of-theseus-analyze git-of-theseus --outdir got

diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ After that, you can generate plots! Here are some ways you can do that:
 1. Run `git-of-theseus-stack-plot cohorts.json` which will write to `stack_plot.png`
 1. Run `git-of-theseus-survival-plot survival.json` which will write to `survival_plot.png` (run it with `--help` for some options)
 
-If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot <foo/survival.json> <bar/survival.json>` (optionally with the `--exp-fit` flag to fit an exponential decay)
+If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot <foo/survival.json> <bar/survival.json>` (optionally with the `--exp-fit` flag to fit an exponential decay) and `git-of-theseus-stack-plot <foo/authors.json> <bar/authors.json>`.
 
 Help
 ----

diff --git a/git_of_theseus/stack_plot.py b/git_of_theseus/stack_plot.py
@@ -19,6 +19,7 @@
 
 import argparse, dateutil.parser, itertools, json, numpy, sys
 from matplotlib import pyplot
+from collections import defaultdict
 
 
 def generate_n_colors(n):
@@ -32,8 +33,59 @@ def euclidean(a, b):
     return colors
 
 
-def stack_plot(input_fn, display=False, outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False):
-    data = json.load(open(input_fn))  # TODO do we support multiple arguments here?
+def stack_plot(input_fns, display=False,
+    outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False, outmerged=False):
+
+    loc = {}  # Helper data structure
+    authors = set()  # All authors
+    tss = set()  # All timestamps
+    for fn in input_fns:
+        print('Reading %s' % fn)
+        data = json.load(open(fn))
+        locr = defaultdict(defaultdict)
+        for i, a in enumerate(data['labels']):
+            authors.add(a)
+            locr[a] = {}
+            for j, t in enumerate(data['ts']):
+                tss.add(t)
+                locr[a][t] = data['y'][i][j]
+        loc[fn] = locr
+
+    authorss = sorted(authors)  # Authors, sorted
+    tsss = sorted(tss)  # Timestamps, sorted
+
+    merged = [[0 for j in range(len(tsss))] for i in range(len(authorss))]
+
+    for i, r in enumerate(loc):
+        # print("repo: ", r)
+        for j, a in enumerate(authorss):
+            # print("  ", a)
+            l = 0
+            for k, t in enumerate(tsss):
+                # print(r, a, t)
+                if a in loc[r].keys():
+                    if t in loc[r][a].keys():
+                        l = loc[r][a][t]
+                        # print("l = ", l)
+                merged[j][k] = merged[j][k] + l
+
+    data = {
+        'y': merged,
+        'ts': [t for t in tsss],
+        'labels': [a for a in authorss]
+    }
+    if outmerged:
+        mergefn = 'merged.json'
+        print('Writing data to %s' % mergefn)
+        f = open(mergefn, 'w')
+        json.dump(
+            {
+                'y': merged,
+                'ts': [t for t in tsss],
+                'labels': [a for a in authorss]
+            }, f)
+        f.close()
+
     y = numpy.array(data['y'])
     if y.shape[0] > max_n:
         js = sorted(range(len(data['labels'])), key=lambda j: max(y[j]), reverse=True)
@@ -74,7 +126,8 @@ def stack_plot_cmdline():
     parser.add_argument('--max-n', default=20, type=int, help='Max number of dataseries (will roll everything else into "other") (default: %(default)s)')
     parser.add_argument('--normalize', action='store_true', help='Normalize the plot to 100%%')
     parser.add_argument('--dont-stack', action='store_true', help='Don\'t stack plot')
-    parser.add_argument('input_fn')
+    parser.add_argument('--outmerged', action='store_true', help='Output merged data to merged.json')
+    parser.add_argument('input_fns', nargs='*')
     kwargs = vars(parser.parse_args())
 
     stack_plot(**kwargs)

diff --git a/tests/test_data_merged_1_2.json b/tests/test_data_merged_1_2.json
@@ -0,0 +1,19 @@
+{
+    "y": [
+        [100, 170, 600, 400, 700],
+        [0, 0, 150, 150, 300],
+        [0, 50, 150, 200, 200]
+    ],
+    "ts": [
+        "2019-01-01T07:00:00",
+        "2019-02-01T07:00:00",
+        "2019-04-01T08:00:00",
+        "2019-06-01T08:00:00",
+        "2019-08-01T08:00:00"
+    ],
+    "labels": [
+        "Author A",
+        "Author B",
+        "Author C"
+    ]
+}
diff --git a/tests/test_data_repo_1.json b/tests/test_data_repo_1.json
@@ -0,0 +1,15 @@
+{
+    "y": [
+        [100, 200, 500],
+        [0, 150, 300]
+    ],
+    "ts": [
+        "2019-01-01T07:00:00",
+        "2019-04-01T08:00:00",
+        "2019-08-01T08:00:00"
+    ],
+    "labels": [
+        "Author A",
+        "Author B"
+    ]
+}
diff --git a/tests/test_data_repo_2.json b/tests/test_data_repo_2.json
@@ -0,0 +1,15 @@
+{
+    "y": [
+        [70, 400, 200],
+        [50, 150, 200]
+    ],
+    "ts": [
+        "2019-02-01T07:00:00",
+        "2019-04-01T08:00:00",
+        "2019-06-01T08:00:00"
+    ],
+    "labels": [
+        "Author A",
+        "Author C"
+    ]
+}
diff --git a/tests/tests.py b/tests/tests.py
@@ -0,0 +1,35 @@
+# Tests for stack_plot
+#
+# To run tests:
+#   (1) ensure that git-of-theuseus in installed
+#   (2) go to folder tests and
+#   (3) python tests.py 
+
+import json
+from git_of_theseus import stack_plot
+
+print('Testing stack_plot...')
+
+print('Test 1 - Run stack_plot for repos 1 and 2')
+out_fn = 'stack_plot.png'
+in_fns = ['test_data_repo_1.json', 'test_data_repo_2.json']
+
+stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True)
+
+# merged.json and test_data_merged.json must have equal JSON contents.
+if json.load(open('merged.json')) == json.load(open('test_data_merged_1_2.json')):
+    print('Test succeeded')
+else:
+    print('Test failed')
+
+print('Test 2 - Run stack_plot for repo 1')
+out_fn = 'stack_plot.png'
+in_fns = ['test_data_repo_1.json']
+
+stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True)
+
+# merged.json and test_data_merged.json must have equal JSON contents.
+if json.load(open('merged.json')) == json.load(open('test_data_repo_1.json')):
+    print('Test succeeded')
+else:
+    print('Test failed')