From 6536bc457f980d70db35a1530e0552e52aada3c8 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Mon, 19 Aug 2019 14:06:35 +0200
Subject: [PATCH 01/40] Removed enum package

Due native integration in Python 3
---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 97dc403..6d96a8f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,2 @@
 -e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server
-enum==0.4.6
 sklearn==0.0

From 824f86e891e92e23495689888128baf8e0ccec20 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Mon, 19 Aug 2019 14:06:42 +0200
Subject: [PATCH 02/40] Update python requirements

---
 requirements_dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index 7ca3f68..e6ef63b 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -3,4 +3,4 @@ pep8-naming==0.4.1
 pytest==3.0.3
 pytest-runner==2.9
 Sphinx==1.5.2
-recommonmark==0.4.0
+recommonmark==0.6.0

From 9b578be8c626f5c78150dab7de272e298fc4d0fc Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 20 Aug 2019 08:16:06 +0200
Subject: [PATCH 03/40] CircleCI Docker Image 'python:3.7-node-browsers'

created .circleci folder and config.yml
---
 .circleci/config.yml | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 .circleci/config.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 0000000..0fcaf74
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,41 @@
+version: 2
+jobs:
+  build:
+    working_directory: ~/phovea
+    docker:
+      - image: circleci/python:3.7-node-browsers
+        environment:
+          NODE_VERSION: 10.16.3
+    tags:
+      - /v\d+.\d+.\d+.*/
+    steps:
+      - checkout
+      - run:
+          command: |
+            (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
+      - restore_cache:
+          key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
+      - run:
+          name: install-pip-wee
+          command: |
+            virtualenv ~/venv
+            . ~/venv/bin/activate
+            pip install -r requirements_dev.txt
+            pip install -r requirements.txt
+      - save_cache:
+          key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
+          paths:
+          - ~/venv
+      - run: #force update of VCS dependencies?
+          name: update-pip-vcs-dependencies
+          command: |
+            . ~/venv/bin/activate
+            pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt
+      - run:
+          name: dist
+          command: |
+            . ~/venv/bin/activate
+            npm run dist
+      - store_artifacts:
+          path: dist
+          prefix: dist

From 45bc67a44f14d0359338efaddf779c100f33d0b0 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 20 Aug 2019 08:16:53 +0200
Subject: [PATCH 04/40] CircleCI Docker image 'python:3.7-node-browsers'

created .gitlab.ci.yml
---
 .gitlab-ci.yml | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 .gitlab-ci.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..50b4bd1
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,52 @@
+image: circleci/python:3.7-node-browsers
+
+variables:
+  NODE_VERSION: "10.16.3"
+  GIT_DEPTH: "1"
+
+cache:
+  key: "$CI_REPOSITORY_URL-$CI_COMMIT_REF_NAME"
+  paths:
+   - ~/venv
+
+before_script:
+  # Install ssh-agent if not already installed, it is required by Docker.
+  # (change apt-get to yum if you use a CentOS-based image)
+  - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
+
+  # Run ssh-agent (inside the build environment)
+  - eval $(ssh-agent -s)
+
+  # Add the SSH key stored in SSH_PRIVATE_KEY variable to the agent store
+  - ssh-add <(echo "$SSH_PRIVATE_KEY")
+
+  # For Docker builds disable host key checking. Be aware that by adding that
+  # you are suspectible to man-in-the-middle attacks.
+  # WARNING: Use this only with the Docker executor, if you use it with shell
+  # you will overwrite your user's SSH config.
+  - mkdir -p ~/.ssh
+  - '[[ -f /.dockerenv ]] && echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config'
+
+stages:
+  - install
+  - build
+
+install-pip-wee:
+  stage: install
+  script: |
+    (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
+    virtualenv ~/venv
+    . ~/venv/bin/activate
+    pip install --upgrade --upgrade-strategy=only-if-needed -r requirements_dev.txt
+    pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt
+
+dist:
+  stage: build
+  script: |
+    . ~/venv/bin/activate
+    npm run dist
+  allow_failure: false
+  artifacts:
+    expire_in: 1 week
+    paths:
+      - dist

From 2d9ab1af27ed5530190a970690d6bb7fb50154ed Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 20 Aug 2019 08:21:51 +0200
Subject: [PATCH 05/40] Remove .encode('ascii') from setup.py

---
 setup.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 18e10f0..9383c07 100644
--- a/setup.py
+++ b/setup.py
@@ -26,12 +26,12 @@ def read_it(name):
 def packaged(*files):
   r = {}
   global pkg
-  r[pkg['name'].encode('ascii')] = list(files)
+  r[pkg['name']] = list(files)
   return r
 
 
 def requirements(file):
-  return [r.strip().encode('ascii') for r in read_it(file).strip().split('\n') if not r.startswith('-e git+https://')]
+  return [r.strip() for r in read_it(file).strip().split('\n') if not r.startswith('-e git+https://')]
 
 
 def to_version(v):
@@ -39,6 +39,7 @@ def to_version(v):
   now = datetime.datetime.utcnow()
   return v.replace('SNAPSHOT', now.strftime('%Y%m%d-%H%M%S'))
 
+
 setup(
   name=pkg['name'],
   version=to_version(pkg['version']),

From ca719f6784c45affcd963e26b9c210d9dfc10910 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 20 Aug 2019 08:23:28 +0200
Subject: [PATCH 06/40] Replace python tests py27 and py34 with py37

done for .travis.yml and tox.ini in taco_server
---
 .travis.yml | 3 +--
 tox.ini     | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 428bcb9..5fcf10b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,7 @@
 language: python
 sudo: required
 env:
-  - TOXENV=py27
-  - TOXENV=py34
+  - TOXENV=py37
 
 install:
   - (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
diff --git a/tox.ini b/tox.ini
index f2734b2..48c0366 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,12 +5,11 @@
 ###############################################################################
 
 [tox]
-envlist = py{27,34}
+envlist = py{37}
 
 [testenv]
 basepython =
-    py27: python2.7
-    py34: python3.4
+    py34: python3.7
 deps =
     flake8
     pytest

From 325e36f01aea6710f3eb67e11df80ba879f527d0 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 20 Aug 2019 13:03:32 +0200
Subject: [PATCH 07/40] used lib2to3 to convert files to new syntax

---
 setup.py                       |  2 +-
 taco_server/api.py             |  3 ++-
 taco_server/src/diff_cache.py  |  6 +++---
 taco_server/src/diff_finder.py | 30 +++++++++++++++---------------
 taco_server/src/generator.py   |  4 ++--
 taco_server/src/modifier.py    | 26 +++++++++++++-------------
 taco_server/src/test1.py       | 28 ++++++++++++++--------------
 7 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/setup.py b/setup.py
index 9383c07..9a5030b 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 # Copyright (c) The Caleydo Team. All rights reserved.
 # Licensed under the new BSD license, available at http://caleydo.org/license
 ###############################################################################
-from __future__ import with_statement, print_function
+
 from setuptools import setup, find_packages
 from codecs import open
 from os import path
diff --git a/taco_server/api.py b/taco_server/api.py
index 84e4e61..5cbda46 100644
--- a/taco_server/api.py
+++ b/taco_server/api.py
@@ -1,6 +1,6 @@
 from phovea_server import ns
 import timeit
-from src import diff_cache
+from .src import diff_cache
 import logging
 
 
@@ -102,6 +102,7 @@ def create():
   """
   return app
 
+
 if __name__ == '__main__':
   app.debug = True
   app.run(host='0.0.0.0', port=9000)
diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py
index 71bf9b7..971bc02 100644
--- a/taco_server/src/diff_cache.py
+++ b/taco_server/src/diff_cache.py
@@ -3,8 +3,7 @@
 # detail (as detail), middle (as count), overview (as ratios)
 
 
-from __future__ import print_function
-from diff_finder import Table, DiffFinder, Diff, Ratios
+from .diff_finder import Table, DiffFinder, Diff, Ratios
 import phovea_server.dataset as dataset
 import timeit
 import json
@@ -37,6 +36,7 @@ def create_cache_dir():
   else:
     _log.info('use existing cache directory: ' + _cache_directory)
 
+
 # run immediately!
 create_cache_dir()
 
@@ -237,7 +237,7 @@ def create_hashname(id1, id2, bins, bins_col, direction, ops):
 
 def ratio_from_json(jsonobj):
   # idk
-  r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', d.keys())(*d.values()))
+  r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', list(d.keys()))(*list(d.values())))
   # todo find a smarter way, really
   cr = 0 if not hasattr(r, "c_ratio") else r.c_ratio
   ar = 0 if not hasattr(r, "a_ratio") else r.a_ratio
diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py
index 6470676..869dd4c 100644
--- a/taco_server/src/diff_finder.py
+++ b/taco_server/src/diff_finder.py
@@ -356,8 +356,8 @@ def reorder_rows_counts(self):
     :param height:
     :return:
     """
-    ids = map(lambda r: r['id'], self.reorder['rows'])
-    filtered_content = filter(lambda r: r['row'] in ids, self.content)
+    ids = [r['id'] for r in self.reorder['rows']]
+    filtered_content = [r for r in self.content if r['row'] in ids]
     return float(len(filtered_content))
 
   def reorder_cols_counts(self):
@@ -367,8 +367,8 @@ def reorder_cols_counts(self):
     :param height:
     :return:
     """
-    ids = map(lambda r: r['id'], self.reorder['cols'])
-    filtered_content = filter(lambda r: r['col'] in ids, self.content)
+    ids = [r['id'] for r in self.reorder['cols']]
+    filtered_content = [r for r in self.content if r['col'] in ids]
     return float(len(filtered_content))
 
   def reorder_rows_cols_counts(self):
@@ -378,9 +378,9 @@ def reorder_rows_cols_counts(self):
     :param height:
     :return:
     """
-    row_ids = map(lambda r: r['id'], self.reorder['rows'])
-    col_ids = map(lambda r: r['id'], self.reorder['cols'])
-    filtered_content = filter(lambda r: r['col'] in col_ids and r['row'] in row_ids, self.content)
+    row_ids = [r['id'] for r in self.reorder['rows']]
+    col_ids = [r['id'] for r in self.reorder['cols']]
+    filtered_content = [r for r in self.content if r['col'] in col_ids and r['row'] in row_ids]
     return float(len(filtered_content))
 
   def reorder_counts(self):
@@ -479,7 +479,7 @@ def per_bin_ratios(self, bins, e_type):
     index2bin = np.digitize(indices, bin_range)
 
     # todo handle the error here when there's no row !
-    pcontent = [[] for x in xrange(bins)]
+    pcontent = [[] for x in range(bins)]
     for c in self.content:
       ci = union_rows.index(c[row])
       bin_index = index2bin[ci]
@@ -489,7 +489,7 @@ def per_bin_ratios(self, bins, e_type):
       pcontent[bin_index] += [c]
 
     # for structure changes
-    pstructure = [{"added_" + e_type: [], "deleted_" + e_type: []} for x in xrange(bins)]
+    pstructure = [{"added_" + e_type: [], "deleted_" + e_type: []} for x in range(bins)]
     # filter for the structure changes, because once there's a structure change, there's no need to find content #what!!
     for a in self.structure["added_" + e_type]:
       ai = union_rows.index(a['id'])
@@ -508,7 +508,7 @@ def per_bin_ratios(self, bins, e_type):
 
     # convert to np.array to use np.where
     union_rows = np.array(union_rows)
-    for i in xrange(bins):
+    for i in range(bins):
       temp = union_rows[np.where(index2bin == i)[0]]
       if dir == D_ROWS:
         punion = {
@@ -565,18 +565,18 @@ def per_entity_ratios(self, dir):
       pstructure = {}
       # filter for the structure changes, because once there's a structure change, there's no need to find content
       # idk why but obj is Diff!
-      pstructure["added_" + e_type] = filter(lambda obj: obj['id'] == id, self.structure["added_" + e_type])
+      pstructure["added_" + e_type] = [obj for obj in self.structure["added_" + e_type] if obj['id'] == id]
       if len(pstructure["added_" + e_type]) != 0:
         # create a ratio where it's only added
         ratio_counts = RatiosAndCounts(Ratios(0, 1, 0, 0), Counts(0, len(union_cols), 0, 0))
       else:
         # find the deleted
-        pstructure["deleted_" + e_type] = filter(lambda obj: obj['id'] == id, self.structure["deleted_" + e_type])
+        pstructure["deleted_" + e_type] = [obj for obj in self.structure["deleted_" + e_type] if obj['id'] == id]
         if len(pstructure["deleted_" + e_type]) != 0:
           ratio_counts = RatiosAndCounts(Ratios(0, 0, 1, 0), Counts(0, 0, len(union_cols), 0))
         else:
           # find the content
-          pcontent = filter(lambda obj: obj[row_id] == id, self.content)
+          pcontent = [obj for obj in self.content if obj[row_id] == id]
           if len(pcontent) == 0:
             pcontent = None
           # more resonable in the case of subtable
@@ -838,7 +838,7 @@ def _compare_ids(self, e_type, ids1, ids2, u_ids, has_merge, has_structure, merg
         merged_ids = str(j).split(merge_delimiter)
         for s in merged_ids:
           # delete the delete operations related to those IDs
-          deleted_log = filter(lambda obj: obj['id'] != s, deleted_log)
+          deleted_log = [obj for obj in deleted_log if obj['id'] != s]
           merged_log += [{"id": s, "pos": np.where(u_ids == s)[0][0], "merge_id": merge_id, "is_added": False}]
         merge_id += 1  # increment it
     # log
@@ -937,7 +937,7 @@ def _compare_values(self):
       # fixing an ungly bug when there are NO unique ids!
       # ## warning! bug ###
       # this happens when one of the tables does NOT have unique ids and the sizes are different... couldn't fix
-      print("Oops! it seems that sizes are not matching", cids1.shape[0], cids2.shape[0])
+      print(("Oops! it seems that sizes are not matching", cids1.shape[0], cids2.shape[0]))
       set_boolean = (np.array(list(set(cids1))) != np.array(list(set(cids2))))
       cdis = cids1[set_boolean]
       # ignore and leave
diff --git a/taco_server/src/generator.py b/taco_server/src/generator.py
index eef2526..817ea19 100644
--- a/taco_server/src/generator.py
+++ b/taco_server/src/generator.py
@@ -6,12 +6,12 @@
 
 # creates an array with random float values within a range with size
 def random_floats_array(low, high, size):
-  return [random.uniform(low, high) for _ in xrange(size)]
+  return [random.uniform(low, high) for _ in range(size)]
 
 
 # creates an array with random int values within a range with size
 def random_int_array(low, high, size):
-  return [random.randint(low, high) for _ in xrange(size)]
+  return [random.randint(low, high) for _ in range(size)]
 
 
 # creates a homogeneous table
diff --git a/taco_server/src/modifier.py b/taco_server/src/modifier.py
index f69f3dc..5a03ceb 100644
--- a/taco_server/src/modifier.py
+++ b/taco_server/src/modifier.py
@@ -1,7 +1,7 @@
 import numpy as np
 import random
-import generator as gen
-import logger as log
+from . import generator as gen
+from . import logger as log
 
 __author__ = 'Reem'
 
@@ -44,7 +44,7 @@ def del_row(my_array, index):
   array_length = len(my_array)
   # check if the table is empty
   if array_length == 0:
-    print("Error: list is empty, can't delete a row", index)
+    print(("Error: list is empty, can't delete a row", index))
     return my_array
   else:
     if index < array_length:
@@ -58,7 +58,7 @@ def del_col(my_array, index):
   array_length = len(my_array)
   # check if the table is empty
   if array_length == 0:
-    print("Error: list is empty, can't delete a col", index)
+    print(("Error: list is empty, can't delete a col", index))
     return my_array
   else:
     row_length = len(my_array[0])
@@ -165,7 +165,7 @@ def merge_columns(full_table, merge_array):
   # update the IDs
   col_ids.insert(merge_array[0], merged_id)
   log.message("merge", "column", merged_id, merge_array)
-  print(merged_id, cols, merged_col, table)
+  print((merged_id, cols, merged_col, table))
   return {"table": table, "col_ids": col_ids, "row_ids": row_ids}
 
 
@@ -189,7 +189,7 @@ def merge_rows(full_table, merge_array):
   # update the IDs
   row_ids.insert(merge_array[0], merged_id)
   log.message("merge", "row", merged_id, merge_array)
-  print(merged_id, rows, merged_row, table)
+  print((merged_id, rows, merged_row, table))
   return {"table": table, "col_ids": col_ids, "row_ids": row_ids}
 
 
@@ -202,27 +202,27 @@ def change_table(full_table, min_data, max_data, operations):
   new_row_id = latest_row_id + 1
   new_col_id = latest_col_id + 1
   # first delete the rows
-  for r in xrange(operations['del_row']):
+  for r in range(operations['del_row']):
     full_table = randomly_change_table(full_table, min_data, max_data, DEL_ROW)
   # then delete the cols
-  for c in xrange(operations['del_col']):
+  for c in range(operations['del_col']):
     full_table = randomly_change_table(full_table, min_data, max_data, DEL_COL)
   # then add rows
-  for r in xrange(operations['add_row']):
+  for r in range(operations['add_row']):
     full_table = randomly_change_table(full_table, min_data, max_data, ADD_ROW, new_row_id)
     new_row_id += 1
   # then add cols
-  for c in xrange(operations['add_col']):
+  for c in range(operations['add_col']):
     full_table = randomly_change_table(full_table, min_data, max_data, ADD_COL, new_col_id)
     new_col_id += 1
   # finally change the cells
-  for c in xrange(operations['ch_cell']):
+  for c in range(operations['ch_cell']):
     full_table = randomly_change_table(full_table, min_data, max_data, CH_CELL)
   # merge operation
   # the order of this operation might change later
   for mc in operations['me_col']:
     # full_table = merge_col(full_table)
-    print ('merge col', mc)
+    print(('merge col', mc))
     full_table = merge_columns(full_table, mc)
   for mr in operations['me_row']:
     full_table = merge_rows(full_table, mr)
@@ -277,7 +277,7 @@ def change_table(full_table, min_data, max_data, operations):
   gen.save_table(result['table'], result['row_ids'], result['col_ids'],
                  data_directory + file_name + str(i + 1) + '_out.csv')
   # just print the size to add it manually to index.json
-  print (result['table'].shape[0], result['table'].shape[1], i)
+  print((result['table'].shape[0], result['table'].shape[1], i))
   # update the ... for next round
   operations_count = {
       'del_row': random.randint(0, 25),
diff --git a/taco_server/src/test1.py b/taco_server/src/test1.py
index 9f632f0..d7523cb 100644
--- a/taco_server/src/test1.py
+++ b/taco_server/src/test1.py
@@ -39,7 +39,7 @@ def del_row(my_array, index):
   array_length = len(my_array)
   # check if the table is empty
   if array_length == 0:
-    print("Error: list is empty, can't delete a row", index)
+    print(("Error: list is empty, can't delete a row", index))
     return my_array
   else:
     if index < array_length:
@@ -53,7 +53,7 @@ def del_col(my_array, index):
   array_length = len(my_array)
   # check if the table is empty
   if array_length == 0:
-    print("Error: list is empty, can't delete a row", index)
+    print(("Error: list is empty, can't delete a row", index))
     return my_array
   else:
     row_length = len(my_array[0])
@@ -80,37 +80,37 @@ def randomly_change_table(table):
   if change_type == ADD_ROW:
     index = random.randint(0, len(table))
     if len(table) > 0:
-      new_row = random.sample(range(min_data, max_data), len(table[0]))
+      new_row = random.sample(list(range(min_data, max_data)), len(table[0]))
     else:
       # table is empty
-      new_row = random.sample(range(min_data, max_data), random.randint(1, largest_row))
-    print("log: add a row in ", index, new_row)
+      new_row = random.sample(list(range(min_data, max_data)), random.randint(1, largest_row))
+    print(("log: add a row in ", index, new_row))
     table = add_row(table, index, new_row)
   elif change_type == ADD_COL:
     if len(table) > 0:
       index = random.randint(0, len(table[0]))
-      new_col = random.sample(range(min_data, max_data), len(table))
+      new_col = random.sample(list(range(min_data, max_data)), len(table))
     else:
       index = 0
-      new_col = random.sample(range(min_data, max_data), random.randint(1, largest_col))
-    print("log: add a col in ", index, new_col)
+      new_col = random.sample(list(range(min_data, max_data)), random.randint(1, largest_col))
+    print(("log: add a col in ", index, new_col))
     table = add_col(table, index, new_col)
   elif change_type == CH_CELL:
     if len(table) > 0:
       i = random.randint(0, len(table) - 1)
       j = random.randint(0, len(table[0]) - 1)
       new_value = random.uniform(min_data, max_data)
-      print("log: change something somewhere ", i, j, new_value)
+      print(("log: change something somewhere ", i, j, new_value))
       table = change_cell(table, i, j, new_value)
     else:
       print("log: there's nothing to change")
   elif change_type == DEL_ROW:
     index = random.randint(0, len(table) - 1)
-    print("log: delete row ", index)
+    print(("log: delete row ", index))
     table = del_row(table, index)
   elif change_type == DEL_COL:
     index = random.randint(0, len(table[0]) - 1)
-    print("log: delete col ", index)
+    print(("log: delete col ", index))
     table = del_col(table, index)
   return table
 
@@ -152,13 +152,13 @@ def randomly_change_table(table):
 # table_3 might be from a file as it has to be big
 input_file = '../../data/small_table_in.csv'
 my_date = np.genfromtxt(input_file, delimiter=',')
-print("this is my data", my_date)
+print(("this is my data", my_date))
 output_file = "../../data/small_table_out.csv"
 
 random.seed(100)
 num_of_changes = random.randint(2, 20)
-print("num of changes is ", num_of_changes - 1)
-for i in xrange(1, num_of_changes):
+print(("num of changes is ", num_of_changes - 1))
+for i in range(1, num_of_changes):
   my_date = randomly_change_table(my_date)
   print(my_date)
 # print(table_2)

From 5ba520d24c0509795a58595cde73ee8f0eeea5ba Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 20 Aug 2019 13:24:14 +0200
Subject: [PATCH 08/40] updated dependencies according to phovea_python

---
 requirements_dev.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index e6ef63b..a7a5dbf 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -1,6 +1,6 @@
-flake8==3.0.4
-pep8-naming==0.4.1
-pytest==3.0.3
-pytest-runner==2.9
-Sphinx==1.5.2
+flake8==3.5.0
+pep8-naming==0.5.0
+pytest==3.5.0
+pytest-runner==4.2
+Sphinx==1.7.2
 recommonmark==0.6.0

From 9f1eab822e15951bcf0461ad9da7d20ede09858a Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Thu, 22 Aug 2019 12:10:50 +0200
Subject: [PATCH 09/40] Use node v8.16.1 in circleci docker image

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0fcaf74..55be307 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -5,7 +5,7 @@ jobs:
     docker:
       - image: circleci/python:3.7-node-browsers
         environment:
-          NODE_VERSION: 10.16.3
+          NODE_VERSION: 8.16.1
     tags:
       - /v\d+.\d+.\d+.*/
     steps:

From 7f0d9fd4adceebbce114b2ca1b9f768d2c90865d Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 22 Aug 2019 12:20:37 +0200
Subject: [PATCH 10/40] removed list() from lib2to3

---
 taco_server/src/test1.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/taco_server/src/test1.py b/taco_server/src/test1.py
index d7523cb..ecb97ae 100644
--- a/taco_server/src/test1.py
+++ b/taco_server/src/test1.py
@@ -80,19 +80,19 @@ def randomly_change_table(table):
   if change_type == ADD_ROW:
     index = random.randint(0, len(table))
     if len(table) > 0:
-      new_row = random.sample(list(range(min_data, max_data)), len(table[0]))
+      new_row = random.sample(range(min_data, max_data), len(table[0]))
     else:
       # table is empty
-      new_row = random.sample(list(range(min_data, max_data)), random.randint(1, largest_row))
+      new_row = random.sample(range(min_data, max_data), random.randint(1, largest_row))
     print(("log: add a row in ", index, new_row))
     table = add_row(table, index, new_row)
   elif change_type == ADD_COL:
     if len(table) > 0:
       index = random.randint(0, len(table[0]))
-      new_col = random.sample(list(range(min_data, max_data)), len(table))
+      new_col = random.sample(range(min_data, max_data), len(table))
     else:
       index = 0
-      new_col = random.sample(list(range(min_data, max_data)), random.randint(1, largest_col))
+      new_col = random.sample(range(min_data, max_data), random.randint(1, largest_col))
     print(("log: add a col in ", index, new_col))
     table = add_col(table, index, new_col)
   elif change_type == CH_CELL:

From 4eba7f62951adfd5ab3eceb1919e20a54b18a320 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 22 Aug 2019 12:20:59 +0200
Subject: [PATCH 11/40] adapted pandas import

---
 taco_server/src/diff_cache.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py
index 971bc02..d60f742 100644
--- a/taco_server/src/diff_cache.py
+++ b/taco_server/src/diff_cache.py
@@ -7,7 +7,7 @@
 import phovea_server.dataset as dataset
 import timeit
 import json
-import pandas.json as ujson
+import pandas as pd
 import os
 import hashlib
 from collections import namedtuple
@@ -50,7 +50,7 @@ def get_diff_cache(filename):
   file_name = _cache_directory + filename + '.json'
   if os.path.isfile(file_name):
     with open(file_name) as data_file:
-      data = ujson.load(data_file)
+      data = pd.json.load(data_file)
     return data
   # if the file doesn't exist
   return None
@@ -100,16 +100,16 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True):
 
     if isinstance(diffobj, Diff):
       # log the detail
-      json_result = ujson.dumps(diffobj.serialize())
+      json_result = pd.json.dumps(diffobj.serialize())
       set_diff_cache(hash_name, json_result)
     else:
       # todo later find a way to send the error
       # e.g. there's no matching column in this case
-      json_result = ujson.dumps(diffobj)  # which is {} for now!
+      json_result = pd.json.dumps(diffobj)  # which is {} for now!
       set_diff_cache(hash_name, json_result)
 
   elif jsonit is False:
-    diffobj = Diff().unserialize(ujson.loads(json_result))
+    diffobj = Diff().unserialize(pd.json.loads(json_result))
 
   if jsonit:
     return json_result
@@ -152,10 +152,10 @@ def get_ratios(id1, id2, direction, ops, bins=1, bins_col=1, jsonit=True):
     # bin == 1 -> timeline bar chart
     # bin == -1 -> 2d ratio plot
     if bins == 1 or bins == -1:
-      json_ratios = ujson.dumps(ratios.serialize())
+      json_ratios = pd.json.dumps(ratios.serialize())
     # bin > 1 -> 2d ratio histogram
     else:
-      json_ratios = ujson.dumps(ratios)
+      json_ratios = pd.json.dumps(ratios)
 
     # cache this as overview
     set_diff_cache(hashname, json_ratios)
@@ -237,7 +237,7 @@ def create_hashname(id1, id2, bins, bins_col, direction, ops):
 
 def ratio_from_json(jsonobj):
   # idk
-  r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', list(d.keys()))(*list(d.values())))
+  r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', d.keys())(*list(d.values())))
   # todo find a smarter way, really
   cr = 0 if not hasattr(r, "c_ratio") else r.c_ratio
   ar = 0 if not hasattr(r, "a_ratio") else r.a_ratio

From 62884394290025fcd203667dd665c806631796b9 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Wed, 28 Aug 2019 11:39:49 +0200
Subject: [PATCH 12/40] fixed indentation

---
 taco_server/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/taco_server/__init__.py b/taco_server/__init__.py
index 757a7b9..3204af7 100644
--- a/taco_server/__init__.py
+++ b/taco_server/__init__.py
@@ -12,9 +12,9 @@ def phovea(registry):
   """
   # generator-phovea:begin
   registry.append('namespace', 'taco', 'taco_server.api',
-                  {
-                      'namespace': '/api/taco'
-                  })
+    {
+      'namespace': '/api/taco'
+    })
   # generator-phovea:end
   pass
 

From f49ba6690a750a90d7f8e57b7e75dcd2ae5e33ea Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 3 Sep 2019 12:34:46 +0200
Subject: [PATCH 13/40] created JsonEncoder

avoid default usage of numpy types
---
 taco_server/src/json_encoder.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 taco_server/src/json_encoder.py

diff --git a/taco_server/src/json_encoder.py b/taco_server/src/json_encoder.py
new file mode 100644
index 0000000..c945c86
--- /dev/null
+++ b/taco_server/src/json_encoder.py
@@ -0,0 +1,21 @@
+"""
+Serializer to avoid default usage of numpy integer/float/bytes/etc.
+"""
+import json
+import numpy
+
+
+class JsonEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, numpy.integer):
+            return int(obj)
+        elif isinstance(obj, numpy.int64):
+            return int(obj)
+        elif isinstance(obj, bytes):
+            return obj.decode('utf-8')
+        elif isinstance(obj, numpy.floating):
+            return float(obj)
+        elif isinstance(obj, numpy.ndarray):
+            return obj.tolist()
+        else:
+            return super(JsonEncoder, self).default(obj)

From 568a56a6853d6ee84fe55d1b62cefbb05a2f5c3c Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Tue, 3 Sep 2019 12:35:12 +0200
Subject: [PATCH 14/40] remove pandas, use json and custom encoder instead

---
 taco_server/src/diff_cache.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py
index d60f742..a106bde 100644
--- a/taco_server/src/diff_cache.py
+++ b/taco_server/src/diff_cache.py
@@ -7,7 +7,7 @@
 import phovea_server.dataset as dataset
 import timeit
 import json
-import pandas as pd
+from . import json_encoder
 import os
 import hashlib
 from collections import namedtuple
@@ -50,7 +50,7 @@ def get_diff_cache(filename):
   file_name = _cache_directory + filename + '.json'
   if os.path.isfile(file_name):
     with open(file_name) as data_file:
-      data = pd.json.load(data_file)
+      data = json.load(data_file)
     return data
   # if the file doesn't exist
   return None
@@ -100,16 +100,17 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True):
 
     if isinstance(diffobj, Diff):
       # log the detail
-      json_result = pd.json.dumps(diffobj.serialize())
+      serialize = Diff.serialize
+      json_result = (json.dumps(diffobj.__dict__, cls=json_encoder.JsonEncoder))
       set_diff_cache(hash_name, json_result)
     else:
       # todo later find a way to send the error
       # e.g. there's no matching column in this case
-      json_result = pd.json.dumps(diffobj)  # which is {} for now!
+      json_result = json.dumps(diffobj)  # which is {} for now!
       set_diff_cache(hash_name, json_result)
 
   elif jsonit is False:
-    diffobj = Diff().unserialize(pd.json.loads(json_result))
+    diffobj = Diff().unserialize(json.loads(json_result))
 
   if jsonit:
     return json_result
@@ -152,10 +153,10 @@ def get_ratios(id1, id2, direction, ops, bins=1, bins_col=1, jsonit=True):
     # bin == 1 -> timeline bar chart
     # bin == -1 -> 2d ratio plot
     if bins == 1 or bins == -1:
-      json_ratios = pd.json.dumps(ratios.serialize())
+      json_ratios = json.dumps(ratios.serialize())
     # bin > 1 -> 2d ratio histogram
     else:
-      json_ratios = pd.json.dumps(ratios)
+      json_ratios = json.dumps(ratios)
 
     # cache this as overview
     set_diff_cache(hashname, json_ratios)
@@ -232,7 +233,7 @@ def create_hashname(id1, id2, bins, bins_col, direction, ops):
   :return:
   """
   name = str(id1) + '_' + str(id2) + '_' + str(bins) + '_' + str(bins_col) + '_' + str(direction) + '_' + str(ops)
-  return hashlib.md5(name).hexdigest()
+  return hashlib.md5(name.encode('utf-8')).hexdigest()
 
 
 def ratio_from_json(jsonobj):

From c198a082ae0b1cff3b0757a03d87e7a408f664de Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 5 Sep 2019 18:08:11 +0200
Subject: [PATCH 15/40] change dtype of numpy.ndarray to string

---
 taco_server/src/diff_cache.py   | 4 ++--
 taco_server/src/json_encoder.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py
index a106bde..6db01fe 100644
--- a/taco_server/src/diff_cache.py
+++ b/taco_server/src/diff_cache.py
@@ -199,12 +199,12 @@ def stratify_matrix(m):
     if row_strat is not None:
       rowids = list(m.rowids())
       row_indices = [rowids.index(o) for o in row_strat.rowids()]
-      data = data[row_indices, ...]
+      data = data[row_indices, ...].astype('str')
 
     if col_strat is not None:
       colids = list(m.colids())
       col_indices = [colids.index(o) for o in col_strat.rowids()]
-      data = data[..., col_indices]
+      data = data[..., col_indices].astype('str')
 
     return Table(rows, cols, data)
 
diff --git a/taco_server/src/json_encoder.py b/taco_server/src/json_encoder.py
index c945c86..0e084b7 100644
--- a/taco_server/src/json_encoder.py
+++ b/taco_server/src/json_encoder.py
@@ -13,6 +13,8 @@ def default(self, obj):
             return int(obj)
         elif isinstance(obj, bytes):
             return obj.decode('utf-8')
+        elif isinstance(obj, numpy.bytes_):
+            return obj.decode('utf-8')
         elif isinstance(obj, numpy.floating):
             return float(obj)
         elif isinstance(obj, numpy.ndarray):

From c269808c72238beac4c22cefcef3a05dac43a48a Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 5 Sep 2019 18:12:57 +0200
Subject: [PATCH 16/40] removed IndexError

- changed dtype of numpy.ndarray to string
- ensure that index 'i' can be found in array
('disordered' can eventually be an encapsulated array)
---
 taco_server/src/diff_finder.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py
index 869dd4c..a06732a 100644
--- a/taco_server/src/diff_finder.py
+++ b/taco_server/src/diff_finder.py
@@ -509,7 +509,7 @@ def per_bin_ratios(self, bins, e_type):
     # convert to np.array to use np.where
     union_rows = np.array(union_rows)
     for i in range(bins):
-      temp = union_rows[np.where(index2bin == i)[0]]
+      temp = union_rows[np.where(index2bin == i)[0]].astype('str').tolist()
       if dir == D_ROWS:
         punion = {
             "ur_ids": temp,
@@ -872,6 +872,7 @@ def _compare_values1(self):
   # @disordered is an array of the IDs that are available in x and not in the matching position in y (or not available at all)
   # in case x and y are a result of the intersection then disordered is the list of disordered IDs in x
   def _find_reorder(self, ids1, ids2, x, y, disordered, direction):
+    import numpy
     # todo this should be as the size of the original ids not just the intesection ids
     # x shape or y shape should be the same
     # or the shape of the IDs in the second table (original y)
@@ -879,20 +880,25 @@ def _find_reorder(self, ids1, ids2, x, y, disordered, direction):
     reordered = []
     for i in disordered:
       # todo check this with more than 2 changes
-      pos_table1 = np.where(ids1 == i)[0][0]
-      pos_table2 = np.where(ids2 == i)[0][0]
+      if isinstance(i, numpy.ndarray):
+        i = i[0]
+      try:
+        pos_table1 = np.where(ids1 == i)[0][0]
+        pos_table2 = np.where(ids2 == i)[0][0]
+      except IndexError:
+        print('index error')
       # todo substitute this with the new one!
       reordered.append({'id': i, 'from': pos_table1, 'to': pos_table2, 'diff': pos_table2 - pos_table1})
 
       old = np.where(x == i)[0][0]
       new = np.where(y == i)[0][0]
       np.put(indices, old, new)
-    # index = []
-    # for i in x:
-    #     if i != y[np.where(x == i)[0][0]]:
-    #         index += [np.where(y == i)[0][0]]
-    #     else:
-    #         index += [np.where(x == i)[0][0]]
+      # index = []
+      # for i in x:
+      #     if i != y[np.where(x == i)[0][0]]:
+      #         index += [np.where(y == i)[0][0]]
+      #     else:
+      #         index += [np.where(x == i)[0][0]]
     self._reorder_to_json(direction, reordered)
     return indices
 
@@ -934,7 +940,7 @@ def _compare_values(self):
     try:
       cdis = cids1[cids1 != cids2]
     except ValueError:
-      # fixing an ungly bug when there are NO unique ids!
+      # fixing an ungly bug when there are NO  unique ids!
       # ## warning! bug ###
       # this happens when one of the tables does NOT have unique ids and the sizes are different... couldn't fix
       print(("Oops! it seems that sizes are not matching", cids1.shape[0], cids2.shape[0]))
@@ -949,7 +955,7 @@ def _compare_values(self):
       inter2 = inter2[:, c_indices]
     # at this point inter2 should look good hopefully!
     # diff work
-    diff = inter2 - inter1
+    diff = inter2.astype('float') - inter1.astype('float')
     # done :)
     # normalization
     normalized_diff = normalize_float_11(diff)

From 83617a0f4fd1bbcc1e3d38cc8d95daf7c8c0cff8 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Mon, 14 Oct 2019 15:16:25 +0200
Subject: [PATCH 17/40] remove _travis.yml_

---
 .travis.yml | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 5fcf10b..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-language: python
-sudo: required
-env:
-  - TOXENV=py37
-
-install:
-  - (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
-  - pip install -r requirements_dev.txt
-  - pip install -r requirements.txt
-
-script: npm run dist
-
-deploy:
-  provider: releases
-  api_key:
-    secure: TK9/P34Bi3WuppiDrBCwVcn41yCBwmILaU8hXTBzUPbT7TbeFIwsC6/4CtH85Z+ZrUve4S5pTmWRNf2dQDxWw3uYu7+bJuemV2J1LHG76mognj+TNEiYxfLQUt3Gql4W7C7FcI4Rlx5/uMN9wY1wro8TWUBMwT6jjSrUWIvK3GXoojd5bHvJx07XpjWl9wCon4D0ruZiFoM2mdeP23lbc2GckETi32oEKswnQXxkMACmxbPzoWbvkxH4aK8Bt2Rj2sl2TbPhVkN6DAkHGkGAvLI+2/aRfG27+oo3OKsaDjbuGABct8TfZccJ970CbQ8kbnCjYxstvqkg1JWjF0W67sX/flBZZOEUA5l0OLWo6HqMGMxm7/lEQhIdPMsRmvXL+HVOxkMrB2dda58QzxVwiZp+rRqUaeabPZp8Kl5xodGrVxsBvxe6zAbJ5jCtCSumG6+kLyKI00/kYlghqQNrgUw0ZsYJlQ34h3lo/24QpaeyDpQoCkGWQgtgqiXGpeKSu7bCnOqIqAy3nbT9Utwj7K8gIasTG5idosEAz/THMampNbGDuyxxc340sYGNMg9Bhm1g2ILWRdtV470p5hwBtIDTKi3/PAizEO26+Wh0zI47Sg3ao57avcbCsTmzbZUeA5J4bojmchhJCHX8su9cSCGh/2fJA/1eBIgEvOQ8LNE=
-  file_glob: true
-  file: dist/taco_server*.egg
-  on:
-    tags: true
-
-notifications:
-  slack:
-    secure: E8/1UIdHSczUbN+6i6gd1d5LM4vmLdwLQ30tpyjvnM0wvfDce76oPxLJAy240WJ5ybXRZUtNrttpVpt4tEXCy8aLFCmxD7s77rVloH+q1J8R/ptTFWZGhFGEujk1awEmVbzcWxJkV9/JENQaeGBKxwv8/EQwWwEkAb7p/+AJb9owmH88b3wUZUGHBWtbMiyyaF4Rm1Wg1stJB8Z1Ga7PRF4cqufTgcDdsCPVv9gAY+VxOIGqX/Vfuc9UWpUH8vq8lHUE7Inn5QS78kuFfSgLWga3H6Mu/Gko1XNlWk0QWWQBUvEZ6ZC6Wuo68KzvUjJHDTnx8WyfHue2JNHIslcX+eJq2WHLeEgM24VeNkILCGo/H/60NGHiSjrIv/Y9h6bQ9FDjo6TUyE4nbdPYN1RN9FQ5UbI9Y4Gi753H9mqnHWlEywBOzHxdZCAuz9Wh03CCF/blsvJ+Obbyo6Jrfe+g44jyi9kQdBNQ78qG6v4EXws8FiYao6x3PpgIwFix42Cpr+soAh5FpA3C1zHSAyZZpXF65/lrDl5yPNofK7Wy0B9bw+0I6Z/u7ZKFNVZXvYPGYvtUVcsALGBdmYc61+LCta36Po0KZseWVAlJj6QnOJDYzv0wvV/zsuf9A5KpYFGiqV9Q7zmtiO5FYF5sBy+lE7O9tHVO4O18IRndhRQgxhs=
-    on_success: change
-    on_failure: always

From 83d4ca1cbe8719d65cac8927de177d6ce7ae37ba Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Mon, 14 Oct 2019 15:17:04 +0200
Subject: [PATCH 18/40] switch branches in _requirements.txt_

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6d96a8f..4f95b4d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
--e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server
+-e git+https://github.com/phovea/phovea_server.git@python_3.7#egg=phovea_server
 sklearn==0.0

From 8c27fad8f0629d98142e2850677293763ddb147d Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Mon, 14 Oct 2019 15:21:09 +0200
Subject: [PATCH 19/40] update node environment variable

in circleci config
---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 55be307..0fcaf74 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -5,7 +5,7 @@ jobs:
     docker:
       - image: circleci/python:3.7-node-browsers
         environment:
-          NODE_VERSION: 8.16.1
+          NODE_VERSION: 10.16.3
     tags:
       - /v\d+.\d+.\d+.*/
     steps:

From dc06b458f9c763aeed1c74682f3823a5a5c8b8dd Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Fri, 18 Oct 2019 10:03:19 +0200
Subject: [PATCH 20/40] switch branches in _requirements.txt_

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4f95b4d..b10f8b8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
--e git+https://github.com/phovea/phovea_server.git@python_3.7#egg=phovea_server
+-e git+https://github.com/phovea/phovea_server.git@flask_1.1.1#egg=phovea_server
 sklearn==0.0

From 5fd73c5fdf35033e708bc1238e994622363237e7 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Wed, 23 Oct 2019 07:54:54 +0200
Subject: [PATCH 21/40] unify _package.json_

---
 package.json | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/package.json b/package.json
index b00a772..e903cd4 100644
--- a/package.json
+++ b/package.json
@@ -1,4 +1,21 @@
 {
+  "name": "taco_server",
+  "description": "The server part for comparing large tabular data using Phovea",
+  "version": "1.0.0-SNAPSHOT",
+  "author": {
+    "name": "The Caleydo Team",
+    "email": "contact@caleydo.org",
+    "url": "https://caleydo.org"
+  },
+  "license": "BSD-3-Clause",
+  "homepage": "http://caleydo.org",
+  "bugs": {
+    "url": "https://github.com/caleydo/taco_server/issues"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/caleydo/taco_server.git"
+  },
   "files": [
     "taco_server",
     "__init__.py",
@@ -17,22 +34,5 @@
     "predist": "npm run build && npm run docs",
     "dist": "python setup.py bdist_egg && cd build && tar cvzf ../dist/taco_server.tar.gz *",
     "docs": "sphinx-apidoc -o docs -f ./taco_server && sphinx-build ./docs build/docs"
-  },
-  "name": "taco_server",
-  "description": "The server part for comparing large tabular data using Phovea",
-  "homepage": "http://caleydo.org",
-  "version": "1.0.0-SNAPSHOT",
-  "author": {
-    "name": "The Caleydo Team",
-    "email": "contact@caleydo.org",
-    "url": "https://caleydo.org"
-  },
-  "license": "BSD-3-Clause",
-  "bugs": {
-    "url": "https://github.com/caleydo/taco_server/issues"
-  },
-  "repository": {
-    "type": "git",
-    "url": "https://github.com/caleydo/taco_server.git"
   }
 }

From 056c5a78589269234dbe0eac9e229448635de247 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Mon, 28 Oct 2019 10:37:20 +0100
Subject: [PATCH 22/40] fix problems with JSON encoder

- remove underscore
- change type of ndarray
---
 taco_server/src/json_encoder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/taco_server/src/json_encoder.py b/taco_server/src/json_encoder.py
index 0e084b7..124878c 100644
--- a/taco_server/src/json_encoder.py
+++ b/taco_server/src/json_encoder.py
@@ -13,11 +13,11 @@ def default(self, obj):
             return int(obj)
         elif isinstance(obj, bytes):
             return obj.decode('utf-8')
-        elif isinstance(obj, numpy.bytes_):
+        elif isinstance(obj, numpy.bytes):
             return obj.decode('utf-8')
         elif isinstance(obj, numpy.floating):
             return float(obj)
         elif isinstance(obj, numpy.ndarray):
-            return obj.tolist()
+            return obj.astype(str)
         else:
             return super(JsonEncoder, self).default(obj)

From 981193d2bb7b3b56cec301c23e1edcd64ed3fab6 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Wed, 30 Oct 2019 08:01:39 +0100
Subject: [PATCH 23/40] use `destination` instead of invalid `prefix`

in _config.yml_
---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0fcaf74..3c9ab59 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -38,4 +38,4 @@ jobs:
             npm run dist
       - store_artifacts:
           path: dist
-          prefix: dist
+          destination: dist

From dc56e50ae8337d4d6f5c9bc7724761ee64179e7f Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Wed, 30 Oct 2019 08:02:15 +0100
Subject: [PATCH 24/40] update node version to "12.13"

and remove environment variable (no frontend-only repo)
---
 .circleci/config.yml | 2 --
 .gitlab-ci.yml       | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3c9ab59..c021421 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -4,8 +4,6 @@ jobs:
     working_directory: ~/phovea
     docker:
       - image: circleci/python:3.7-node-browsers
-        environment:
-          NODE_VERSION: 10.16.3
     tags:
       - /v\d+.\d+.\d+.*/
     steps:
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 50b4bd1..1da2330 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,7 @@
 image: circleci/python:3.7-node-browsers
 
 variables:
-  NODE_VERSION: "10.16.3"
+  NODE_VERSION: "12.13"
   GIT_DEPTH: "1"
 
 cache:

From fbdb8aa01646525212d8fd34ead6ff8a20c7340e Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 31 Oct 2019 07:59:58 +0100
Subject: [PATCH 25/40] change type of numpy array to avoid error

with `bytes` and `string`
---
 taco_server/src/diff_finder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py
index a06732a..bc5cb24 100644
--- a/taco_server/src/diff_finder.py
+++ b/taco_server/src/diff_finder.py
@@ -740,7 +740,7 @@ def __init__(self, t1, t2, rowtype, coltype, direction):
     self.diff = Diff(direction=self._direction)
     self.union = {}
     self.intersection = {}  # we only need this for rows when we have content changes
-    self.intersection["ic_ids"] = get_intersection(self._table1.col_ids, self._table2.col_ids)
+    self.intersection["ic_ids"] = get_intersection(self._table1.col_ids, self._table2.col_ids.astype(str))
     if self.intersection["ic_ids"].shape[0] > 0:
       # there's at least one common column between the tables
       # otherwise there's no need to calculate the unions

From 46f81ce37fed9e7fe7438fdc33b5d2c58f3450d7 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 31 Oct 2019 11:13:02 +0100
Subject: [PATCH 26/40] fix check whether key exists

---
 taco_server/src/diff_finder.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py
index 6470676..2b63905 100644
--- a/taco_server/src/diff_finder.py
+++ b/taco_server/src/diff_finder.py
@@ -172,11 +172,11 @@ def serialize(self):
     }
 
   def unserialize(self, json_obj):
-    self.content = [] if json_obj['content'] is None else json_obj['content']
-    self.structure = {} if json_obj['structure'] is None else json_obj['structure']
-    self.merge = {} if json_obj['merge'] is None else json_obj['merge']
-    self.reorder = {'rows': [], 'cols': []} if json_obj['reorder'] is None else json_obj['reorder']
-    self.union = {} if json_obj['union'] is None else json_obj['union']
+    self.content = json_obj['content'] if 'content' in list(json_obj.keys()) else []
+    self.structure = json_obj['structure'] if 'structure' in list(json_obj.keys()) else {}
+    self.merge = json_obj['merge'] if 'merge' in list(json_obj.keys()) else {}
+    self.reorder = json_obj['reorder'] if 'reorder' in list(json_obj.keys()) else {'rows': [], 'cols': []}
+    self.union = json_obj['union'] if 'union' in list(json_obj.keys()) else {}
     return self
 
   def content_counts_percell(self):
@@ -421,7 +421,7 @@ def aggregate(self, bins, bins_col=2):
       # it's the case of histogram or bar plot
       result = {}
       if self._direction == D_ROWS_COLS or self._direction == D_ROWS:
-        union_rows = self.union['ur_ids']
+        union_rows = self.union['ur_ids'] if 'ur_ids' in list(self.union.keys()) else []
         max_height = len(union_rows)
         if bins >= max_height:
           # this is the case of bar plot
@@ -437,7 +437,7 @@ def aggregate(self, bins, bins_col=2):
       # todo the rows might have different bins number than the cols
       if self._direction == D_ROWS_COLS or self._direction == D_COLS:
         # if it's the cols not the rows then switch
-        union_cols = self.union['uc_ids']
+        union_cols = self.union['uc_ids'] if 'uc_ids' in list(self.union.keys()) else []
         max_width = len(union_cols)
         if bins_col >= max_width:
           # todo handle the > alone or?
@@ -540,15 +540,15 @@ def per_entity_ratios(self, dir):
     # get a partial diff where every row is a diff
     # 1. Partition
     # get the direction
-    union_rows = self.union['ur_ids']
-    union_cols = self.union['uc_ids']
+    union_rows = self.union['ur_ids'] if 'ur_ids' in list(self.union.keys()) else []
+    union_cols = self.union['uc_ids'] if 'uc_ids' in list(self.union.keys()) else []
     e_type = "rows"
     row_id = "row"
 
     if dir == D_COLS:
       # if it's the cols not the rows then switch
-      union_rows = self.union['uc_ids']
-      union_cols = self.union['ur_ids']
+      union_rows = self.union['uc_ids'] if 'uc_ids' in list(self.union.keys()) else []
+      union_cols = self.union['ur_ids'] if 'ur_ids' in list(self.union.keys()) else []
       # todo handle the case of both rows and columns
       e_type = "cols"
       row_id = "col"

From 1e812f9993c4be5d1184f3f9ba54059f2ec7614d Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 31 Oct 2019 15:10:08 +0100
Subject: [PATCH 27/40] switch branches in _requirements.txt_

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b10f8b8..4f95b4d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
--e git+https://github.com/phovea/phovea_server.git@flask_1.1.1#egg=phovea_server
+-e git+https://github.com/phovea/phovea_server.git@python_3.7#egg=phovea_server
 sklearn==0.0

From dab80fe7047214847f04f75b0ae535c2888ba0b3 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 31 Oct 2019 15:17:21 +0100
Subject: [PATCH 28/40] fix error with indentation and unused variables

---
 taco_server/__init__.py       | 7 +++----
 taco_server/src/diff_cache.py | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/taco_server/__init__.py b/taco_server/__init__.py
index 3204af7..c905365 100644
--- a/taco_server/__init__.py
+++ b/taco_server/__init__.py
@@ -11,10 +11,9 @@ def phovea(registry):
   :param registry:
   """
   # generator-phovea:begin
-  registry.append('namespace', 'taco', 'taco_server.api',
-    {
-      'namespace': '/api/taco'
-    })
+  registry.append('namespace', 'taco', 'taco_server.api', {
+    'namespace': '/api/taco'
+  })
   # generator-phovea:end
   pass
 
diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py
index 6db01fe..c027926 100644
--- a/taco_server/src/diff_cache.py
+++ b/taco_server/src/diff_cache.py
@@ -100,7 +100,7 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True):
 
     if isinstance(diffobj, Diff):
       # log the detail
-      serialize = Diff.serialize
+      serialize = Diff.serialize # noqa E121
       json_result = (json.dumps(diffobj.__dict__, cls=json_encoder.JsonEncoder))
       set_diff_cache(hash_name, json_result)
     else:

From eff794df87762d762cfd80a9e2a5f3307f21edcc Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 31 Oct 2019 15:20:06 +0100
Subject: [PATCH 29/40] add spaces

---
 taco_server/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taco_server/__init__.py b/taco_server/__init__.py
index c905365..5beac01 100644
--- a/taco_server/__init__.py
+++ b/taco_server/__init__.py
@@ -12,7 +12,7 @@ def phovea(registry):
   """
   # generator-phovea:begin
   registry.append('namespace', 'taco', 'taco_server.api', {
-    'namespace': '/api/taco'
+      'namespace': '/api/taco'
   })
   # generator-phovea:end
   pass

From 9889a8643d26a521e9dc16ce881bb07c48bb042e Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 31 Oct 2019 15:32:18 +0100
Subject: [PATCH 30/40] add decoding

---
 build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.py b/build.py
index 2a12e00..63fa1ef 100644
--- a/build.py
+++ b/build.py
@@ -19,7 +19,7 @@ def _resolve_plugin(repo, version):
   if os.path.isdir('.git') and repo:
     if repo.endswith('.git'):
       repo = repo[0:-4]
-    return repo + '/commit/' + _git_head('.')
+    return repo + '/commit/' + _git_head('.').decode('utf-8')
   # not a git repo
   return version
 

From 06f6f0cd6add8ca204ff90acf8756275f9416d15 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Wed, 18 Dec 2019 21:25:37 +0100
Subject: [PATCH 31/40] Update template files and dependencies

Using generator-phovea v3.0.0

- Update CircleCI config
- Update dev dependencies
- Update buildPython.js
---
 .circleci/config.yml              | 28 +++++++++----
 .yo-rc.json                       |  8 +++-
 buildPython.js                    | 69 +++++++++++++++++++++++++++++++
 deploy/docker-compose.partial.yml |  1 +
 package.json                      |  8 ++--
 requirements.txt                  |  2 +-
 setup.py                          |  4 +-
 tox.ini                           |  2 +-
 8 files changed, 107 insertions(+), 15 deletions(-)
 create mode 100644 buildPython.js
 create mode 100644 deploy/docker-compose.partial.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 93e6342..5f59cc4 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,16 +3,27 @@ jobs:
   build:
     working_directory: ~/phovea
     docker:
-      - image: caleydo/phovea_circleci_python:v2.0
+      - image: caleydo/phovea_circleci_python:v3.0
     steps:
       - checkout
       - run:
+          name: Show Node.js and npm version
+          command: |
+            node -v
+            npm -v
+      - run:
+          name: Show Python and pip version
+          command: |
+            python --version
+            pip --version
+      - run:
+          name: Install Docker packages from docker_packages.txt
           command: |
             (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
       - restore_cache:
           key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
       - run:
-          name: install-pip-wee
+          name: Install pip requirements
           command: |
             virtualenv ~/venv
             . ~/venv/bin/activate
@@ -22,13 +33,16 @@ jobs:
           key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
           paths:
           - ~/venv
-      - run: #force update of VCS dependencies?
-          name: update-pip-vcs-dependencies
+      - run:
+          name: Force an update of pip dependencies from git repositories # not sure if this is working ?
           command: |
             . ~/venv/bin/activate
             pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt
       - run:
-          name: dist
+          name: Show installed pip packages
+          command: pip list || true
+      - run:
+          name: Build
           command: |
             . ~/venv/bin/activate
             npm run dist
@@ -39,8 +53,8 @@ workflows:
   version: 2
 #  build-nightly:
 #    triggers:
-#      - schedule: # nightly build during weekday
-#          cron: "15 1 * * 1-5"
+#      - schedule:
+#          cron: "15 1 * * 1-5" # "At 01:15 on every day-of-week from Monday through Friday.”, see: https://crontab.guru/#15_1_*_*_1-5
 #          filters:
 #            branches:
 #              only:
diff --git a/.yo-rc.json b/.yo-rc.json
index 896b193..9c31be8 100644
--- a/.yo-rc.json
+++ b/.yo-rc.json
@@ -27,6 +27,12 @@
       "debianPackages": [],
       "redhatPackages": []
     },
-    "today": "Tue, 08 Nov 2016 08:36:05 GMT"
+    "today": "Tue, 08 Nov 2016 08:36:05 GMT",
+    "promptValues": {
+      "authorName": "The Caleydo Team",
+      "authorEmail": "contact@caleydo.org",
+      "authorUrl": "https://caleydo.org",
+      "githubAccount": "caleydo"
+    }
   }
 }
\ No newline at end of file
diff --git a/buildPython.js b/buildPython.js
new file mode 100644
index 0000000..c30f2e8
--- /dev/null
+++ b/buildPython.js
@@ -0,0 +1,69 @@
+/**
+ * Created by sam on 13.11.2016.
+ */
+
+const spawnSync = require('child_process').spawnSync;
+const fs = require('fs');
+
+function gitHead(cwd) {
+  const r = spawnSync('git', ['rev-parse', '--verify', 'HEAD'], {
+    cwd: cwd
+  });
+  if (!r.stdout) {
+    console.error(cwd, r.error);
+    return 'error';
+  }
+  return r.stdout.toString().trim();
+}
+
+function resolvePlugin(repo, version) {
+  if (fs.lstatSync('.git').isDirectory() && repo) {
+    if (repo.endsWith('.git')) {
+      repo = repo.slice(0, repo.length - 4);
+      return repo + '/commit/' + gitHead('.');
+    }
+  }
+  // not a git repo
+  return version;
+}
+
+function toVersion(v) {
+  const now = new Date().toISOString();
+  // %Y%m%d-%H%M%S
+  const fmt = now
+    .replace(/T/, ' ')
+    .replace(/\..+/, '')
+    .replace(/[-:]/, '')
+    .replace(' ', '-');
+  return v.replace('SNAPSHOT', fmt);
+}
+
+function _main() {
+  const pkg = require('./package.json');
+  const name = pkg.name;
+  const version = toVersion(pkg.version);
+  const resolved = resolvePlugin((pkg.repository || {}).url, version);
+
+  const buildInfo = {
+    name,
+    version,
+    resolved,
+    description: pkg.description,
+    homepage: pkg.homepage,
+    repository: (pkg.repository || {}).url
+  };
+
+  const l = ('build/source/' + name.toLowerCase()).split('/');
+  l.forEach((_, i) => {
+    const path = l.slice(0, i + 1).join('/');
+    if (!fs.existsSync(path)) {
+      fs.mkdirSync(path);
+    }
+  });
+
+  fs.writeFileSync('build/source/' + name.toLowerCase() + '/buildInfo.json', JSON.stringify(buildInfo, null, ' '));
+}
+
+if (require.main === module) {
+  _main();
+}
diff --git a/deploy/docker-compose.partial.yml b/deploy/docker-compose.partial.yml
new file mode 100644
index 0000000..745b1fd
--- /dev/null
+++ b/deploy/docker-compose.partial.yml
@@ -0,0 +1 @@
+version: '2.0'
diff --git a/package.json b/package.json
index e903cd4..7cad7c8 100644
--- a/package.json
+++ b/package.json
@@ -14,7 +14,7 @@
   },
   "repository": {
     "type": "git",
-    "url": "https://github.com/caleydo/taco_server.git"
+    "url": "https://github.com/Caleydo/taco_server.git"
   },
   "files": [
     "taco_server",
@@ -26,13 +26,13 @@
     "docker_packages.txt"
   ],
   "scripts": {
-    "check": "flake8",
+    "check": "flake8 --exclude=.git,venv,deploy,docs,__pycache__,node_modules",
     "pretest": "npm run check",
     "test": "test ! -d tests || python setup.py test",
     "prebuild": "node -e \"process.exit(process.env.PHOVEA_SKIP_TESTS === undefined?1:0)\" || npm run test",
-    "build": "python build.py",
+    "build": "rm -rf build/source && find . -name '*.pyc' -delete && node buildPython.js && cp -r ./taco_server build/source/",
     "predist": "npm run build && npm run docs",
-    "dist": "python setup.py bdist_egg && cd build && tar cvzf ../dist/taco_server.tar.gz *",
+    "dist": "python setup.py sdist bdist_wheel && cd build && tar cvzf ../dist/taco_server.tar.gz *",
     "docs": "sphinx-apidoc -o docs -f ./taco_server && sphinx-build ./docs build/docs"
   }
 }
diff --git a/requirements.txt b/requirements.txt
index 97dc403..0cdf3da 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 -e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server
 enum==0.4.6
-sklearn==0.0
+sklearn==0.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 18e10f0..b4f30f1 100644
--- a/setup.py
+++ b/setup.py
@@ -39,12 +39,14 @@ def to_version(v):
   now = datetime.datetime.utcnow()
   return v.replace('SNAPSHOT', now.strftime('%Y%m%d-%H%M%S'))
 
+
 setup(
-  name=pkg['name'],
+  name=pkg['name'].lower(),
   version=to_version(pkg['version']),
   url=pkg['homepage'],
   description=pkg['description'],
   long_description=read_it('README.md'),
+  long_description_content_type='text/markdown',
   keywords=pkg.get('keywords', ''),
   author=pkg['author']['name'],
   author_email=pkg['author']['email'],
diff --git a/tox.ini b/tox.ini
index f2734b2..a349028 100644
--- a/tox.ini
+++ b/tox.ini
@@ -21,7 +21,7 @@ commands =
     py.test tests
 
 [flake8]
-ignore=E111,E114,E501
+ignore=E111,E114,E501,E121,E123,E126,E226,E24,E704
 exclude = .tox,*.egg,build,data,.git,__pycache__,docs,node_modules
 
 [pytest]

From 1002ad54c6f342eb8d23ca3f78fa1affbe23d3a3 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Wed, 18 Dec 2019 21:31:05 +0100
Subject: [PATCH 32/40] Fix flake8

---
 data/olympics_generator/count_by_year.py | 59 +++++++++++++-----------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/data/olympics_generator/count_by_year.py b/data/olympics_generator/count_by_year.py
index 229b97d..ebef3c0 100644
--- a/data/olympics_generator/count_by_year.py
+++ b/data/olympics_generator/count_by_year.py
@@ -3,21 +3,22 @@
 
 createdCSVs = []
 
-def writeIndexJson():
+
+def write_index_json():
   with open('../index.json', 'w') as outfile:
     json.dump(createdCSVs, outfile)
 
 
-def writeCSV(year, medalType, fieldnames, medalsPerCountry):
+def write_csv(year, medal_type, fieldnames, medals_per_country):
   if year is None:
     print('Invalid year -> file not written')
     return
 
-  name = 'Olympic Games ' + year + ' (' + medalType + ' Medals)'
-  filename = 'olympics_' + year + '_' + medalType.lower() + '.csv'
+  name = 'Olympic Games ' + year + ' (' + medal_type + ' Medals)'
+  filename = 'olympics_' + year + '_' + medal_type.lower() + '.csv'
 
   # sort countries by sum of all medals
-  sortedBySum = sorted(medalsPerCountry.items(), key=lambda x: sum(x[1].values()), reverse=True)
+  sortedBySum = sorted(medals_per_country.items(), key=lambda x: sum(x[1].values()), reverse=True)
 
   print('----------------')
   print('Write ' + filename)
@@ -26,7 +27,7 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry):
 
   # get min and max value of the whole csv for the range
   maxValue = float('-inf')
-  #minValue = float('inf') # does not work, because we fill empty cells with 0 by default
+  # minValue = float('inf') # does not work, because we fill empty cells with 0 by default
 
   with open('../' + filename, 'wb') as output:
     writer = csv.DictWriter(output, fieldnames=fieldnames, restval='0', dialect='excel')
@@ -34,7 +35,7 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry):
     for k, v in sortedBySum:
       values = list(v.values())
       maxValue = max(maxValue, max(values))
-      #minValue = min(minValue, min(values))
+      # minValue = min(minValue, min(values))
       v['CountryCode'] = k
       writer.writerow(v)
 
@@ -43,7 +44,7 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry):
   stats['name'] = name
   stats['path'] = filename
   stats['type'] = 'matrix'
-  stats['size'] = [len(sortedBySum), len(fieldnames)-1] # -1 = CountryCode fieldname
+  stats['size'] = [len(sortedBySum), len(fieldnames)-1]  # -1 = CountryCode fieldname
   stats['rowtype'] = 'Country'
   stats['coltype'] = 'Discipline'
   stats['value'] = dict(type='real', range=[0, maxValue])
@@ -52,48 +53,50 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry):
 
   print('----------------')
 
-def readCSV(medalType = 'Total'):
+
+def read_csv(medal_type='Total'):
   with open('./MedalData1.csv', 'rb') as csvfile:
-    reader = csv.DictReader(csvfile, fieldnames=['Games','Sport','Event','Athlete(s)','CountryCode','CountryName','Medal','ResultInSeconds'], dialect='excel-tab')
+    reader = csv.DictReader(csvfile, fieldnames=['Games', 'Sport', 'Event', 'Athlete(s)', 'CountryCode', 'CountryName', 'Medal', 'ResultInSeconds'], dialect='excel-tab')
     next(reader)
 
     lastGames = None
     fieldnames = ['CountryCode']
-    medalsPerCountry = dict()
+    medals_per_country = dict()
 
     for row in reader:
       if row['Games'] != lastGames:
         # write old year when a new year is detected
-        writeCSV(lastGames, medalType, fieldnames, medalsPerCountry)
+        write_csv(lastGames, medal_type, fieldnames, medals_per_country)
 
         # clean up variables
         fieldnames = ['CountryCode']
-        medalsPerCountry = dict()
+        medals_per_country = dict()
 
       lastGames = row['Games']
-      country = row['CountryCode'] # short-cut
+      country = row['CountryCode']  # short-cut
 
       if row['Event'] not in fieldnames:
         fieldnames.append(row['Event'])
 
-      if row['Medal'] == medalType or medalType is 'Total':
-        if country not in medalsPerCountry:
-          medalsPerCountry[country] = dict()
-          #medalsPerCountry[country]['CountryCode'] = country
+      if row['Medal'] == medal_type or medal_type == 'Total':
+        if country not in medals_per_country:
+          medals_per_country[country] = dict()
+          # medals_per_country[country]['CountryCode'] = country
 
-        if row['Event'] not in medalsPerCountry[country]:
-          medalsPerCountry[country][row['Event']] = 0
+        if row['Event'] not in medals_per_country[country]:
+          medals_per_country[country][row['Event']] = 0
 
-        medalsPerCountry[country][row['Event']] += 1
+        medals_per_country[country][row['Event']] += 1
 
-      #print(row['Games'], row['Event'], country, row['Medal'])
+      # print(row['Games'], row['Event'], country, row['Medal'])
 
     # write the last file
-    writeCSV(lastGames, medalType, fieldnames, medalsPerCountry)
+    write_csv(lastGames, medal_type, fieldnames, medals_per_country)
+
 
-readCSV('Total')
-readCSV('Bronze')
-readCSV('Silver')
-readCSV('Gold')
+read_csv('Total')
+read_csv('Bronze')
+read_csv('Silver')
+read_csv('Gold')
 
-writeIndexJson()
+write_index_json()

From f82ae49384f92d764af17d7bf9b9442241934ade Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Wed, 18 Dec 2019 21:33:57 +0100
Subject: [PATCH 33/40] Fix flake8

---
 data/olympics_generator/count_by_year.py | 34 ++++++++++++------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/data/olympics_generator/count_by_year.py b/data/olympics_generator/count_by_year.py
index ebef3c0..797f8b2 100644
--- a/data/olympics_generator/count_by_year.py
+++ b/data/olympics_generator/count_by_year.py
@@ -1,12 +1,12 @@
 import csv
 import json
 
-createdCSVs = []
+created_cvs_list = []
 
 
 def write_index_json():
   with open('../index.json', 'w') as outfile:
-    json.dump(createdCSVs, outfile)
+    json.dump(created_cvs_list, outfile)
 
 
 def write_csv(year, medal_type, fieldnames, medals_per_country):
@@ -18,24 +18,24 @@ def write_csv(year, medal_type, fieldnames, medals_per_country):
   filename = 'olympics_' + year + '_' + medal_type.lower() + '.csv'
 
   # sort countries by sum of all medals
-  sortedBySum = sorted(medals_per_country.items(), key=lambda x: sum(x[1].values()), reverse=True)
+  sorted_by_sum = sorted(medals_per_country.items(), key=lambda x: sum(x[1].values()), reverse=True)
 
   print('----------------')
   print('Write ' + filename)
   print(fieldnames)
-  print(sortedBySum)
+  print(sorted_by_sum)
 
   # get min and max value of the whole csv for the range
-  maxValue = float('-inf')
-  # minValue = float('inf') # does not work, because we fill empty cells with 0 by default
+  max_value = float('-inf')
+  # min_value = float('inf') # does not work, because we fill empty cells with 0 by default
 
   with open('../' + filename, 'wb') as output:
     writer = csv.DictWriter(output, fieldnames=fieldnames, restval='0', dialect='excel')
     writer.writeheader()
-    for k, v in sortedBySum:
+    for k, v in sorted_by_sum:
       values = list(v.values())
-      maxValue = max(maxValue, max(values))
-      # minValue = min(minValue, min(values))
+      max_value = max(max_value, max(values))
+      # min_value = min(min_value, min(values))
       v['CountryCode'] = k
       writer.writerow(v)
 
@@ -44,12 +44,12 @@ def write_csv(year, medal_type, fieldnames, medals_per_country):
   stats['name'] = name
   stats['path'] = filename
   stats['type'] = 'matrix'
-  stats['size'] = [len(sortedBySum), len(fieldnames)-1]  # -1 = CountryCode fieldname
+  stats['size'] = [len(sorted_by_sum), len(fieldnames)-1]  # -1 = CountryCode fieldname
   stats['rowtype'] = 'Country'
   stats['coltype'] = 'Discipline'
-  stats['value'] = dict(type='real', range=[0, maxValue])
+  stats['value'] = dict(type='real', range=[0, max_value])
 
-  createdCSVs.append(stats)
+  created_cvs_list.append(stats)
 
   print('----------------')
 
@@ -59,20 +59,20 @@ def read_csv(medal_type='Total'):
     reader = csv.DictReader(csvfile, fieldnames=['Games', 'Sport', 'Event', 'Athlete(s)', 'CountryCode', 'CountryName', 'Medal', 'ResultInSeconds'], dialect='excel-tab')
     next(reader)
 
-    lastGames = None
+    last_games = None
     fieldnames = ['CountryCode']
     medals_per_country = dict()
 
     for row in reader:
-      if row['Games'] != lastGames:
+      if row['Games'] != last_games:
         # write old year when a new year is detected
-        write_csv(lastGames, medal_type, fieldnames, medals_per_country)
+        write_csv(last_games, medal_type, fieldnames, medals_per_country)
 
         # clean up variables
         fieldnames = ['CountryCode']
         medals_per_country = dict()
 
-      lastGames = row['Games']
+      last_games = row['Games']
       country = row['CountryCode']  # short-cut
 
       if row['Event'] not in fieldnames:
@@ -91,7 +91,7 @@ def read_csv(medal_type='Total'):
       # print(row['Games'], row['Event'], country, row['Medal'])
 
     # write the last file
-    write_csv(lastGames, medal_type, fieldnames, medals_per_country)
+    write_csv(last_games, medal_type, fieldnames, medals_per_country)
 
 
 read_csv('Total')

From 2db80c811ae1fba55b09432d83bf281bc65861ff Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 19 Dec 2019 09:24:22 +0100
Subject: [PATCH 34/40] change type of array in `Table` to string

---
 taco_server/src/diff_finder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py
index 0542f5a..13fc5db 100644
--- a/taco_server/src/diff_finder.py
+++ b/taco_server/src/diff_finder.py
@@ -139,8 +139,8 @@ def generate_diff_from_files(file1, file2):
 # Table data structure
 class Table:
   def __init__(self, rows, cols, content):
-    self.row_ids = np.asarray(rows, 'object')
-    self.col_ids = np.asarray(cols, 'object')
+    self.row_ids = np.asarray(rows, 'object').astype(str)
+    self.col_ids = np.asarray(cols, 'object').astype(str)
     self.content = content
 
 

From f081ed5440950303c8242e7de871b534f1db2b04 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 19 Dec 2019 09:24:34 +0100
Subject: [PATCH 35/40] use custom JsonEncoder

---
 taco_server/src/diff_cache.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py
index c027926..5b5039b 100644
--- a/taco_server/src/diff_cache.py
+++ b/taco_server/src/diff_cache.py
@@ -106,7 +106,7 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True):
     else:
       # todo later find a way to send the error
       # e.g. there's no matching column in this case
-      json_result = json.dumps(diffobj)  # which is {} for now!
+      json_result = json.dumps(diffobj, cls=json_encoder.JsonEncoder)  # which is {} for now!
       set_diff_cache(hash_name, json_result)
 
   elif jsonit is False:
@@ -153,10 +153,10 @@ def get_ratios(id1, id2, direction, ops, bins=1, bins_col=1, jsonit=True):
     # bin == 1 -> timeline bar chart
     # bin == -1 -> 2d ratio plot
     if bins == 1 or bins == -1:
-      json_ratios = json.dumps(ratios.serialize())
+      json_ratios = json.dumps(ratios.serialize(), cls=json_encoder.JsonEncoder)
     # bin > 1 -> 2d ratio histogram
     else:
-      json_ratios = json.dumps(ratios)
+      json_ratios = json.dumps(ratios, cls=json_encoder.JsonEncoder)
 
     # cache this as overview
     set_diff_cache(hashname, json_ratios)

From 5dd80ecff3d317f30057f284e6ae8e77930b95f8 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Thu, 2 Jan 2020 14:25:24 +0100
Subject: [PATCH 36/40] Use Debian `buster` as CircleCI Docker image

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 991be3d..bc4de12 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,7 +3,7 @@ jobs:
   build:
     working_directory: ~/phovea
     docker:
-      - image: circleci/python:3.7-node-browsers
+      - image: circleci/python:3.7-buster-node-browsers # for node version see Dockerfile on https://hub.docker.com/r/circleci/python
     steps:
       - checkout
       - run:

From 041a240b1d8968f6889bb43c05e847fe24b96939 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@datavisyn.io>
Date: Thu, 2 Jan 2020 14:25:44 +0100
Subject: [PATCH 37/40] Remove .gitlab-ci.yml

---
 .gitlab-ci.yml | 52 --------------------------------------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 .gitlab-ci.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
deleted file mode 100644
index 1da2330..0000000
--- a/.gitlab-ci.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-image: circleci/python:3.7-node-browsers
-
-variables:
-  NODE_VERSION: "12.13"
-  GIT_DEPTH: "1"
-
-cache:
-  key: "$CI_REPOSITORY_URL-$CI_COMMIT_REF_NAME"
-  paths:
-   - ~/venv
-
-before_script:
-  # Install ssh-agent if not already installed, it is required by Docker.
-  # (change apt-get to yum if you use a CentOS-based image)
-  - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
-
-  # Run ssh-agent (inside the build environment)
-  - eval $(ssh-agent -s)
-
-  # Add the SSH key stored in SSH_PRIVATE_KEY variable to the agent store
-  - ssh-add <(echo "$SSH_PRIVATE_KEY")
-
-  # For Docker builds disable host key checking. Be aware that by adding that
-  # you are suspectible to man-in-the-middle attacks.
-  # WARNING: Use this only with the Docker executor, if you use it with shell
-  # you will overwrite your user's SSH config.
-  - mkdir -p ~/.ssh
-  - '[[ -f /.dockerenv ]] && echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config'
-
-stages:
-  - install
-  - build
-
-install-pip-wee:
-  stage: install
-  script: |
-    (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
-    virtualenv ~/venv
-    . ~/venv/bin/activate
-    pip install --upgrade --upgrade-strategy=only-if-needed -r requirements_dev.txt
-    pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt
-
-dist:
-  stage: build
-  script: |
-    . ~/venv/bin/activate
-    npm run dist
-  allow_failure: false
-  artifacts:
-    expire_in: 1 week
-    paths:
-      - dist

From c94221e9a331012a7124541d2521d8ceae39487d Mon Sep 17 00:00:00 2001
From: rumersdorfer <45141967+rumersdorfer@users.noreply.github.com>
Date: Tue, 7 Jan 2020 17:57:52 +0100
Subject: [PATCH 38/40] Ignore package-lock.json

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index a00b0c0..d283702 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,4 @@ __pycache__/
 # due to using tox and pytest
 .tox
 .cache
+package-lock.json

From e0308e80fb67cf49966b18f62026755f3365a113 Mon Sep 17 00:00:00 2001
From: dvvanessastoiber <vanessa.stoiber@datavisyn.io>
Date: Thu, 16 Jan 2020 14:47:33 +0100
Subject: [PATCH 39/40] Prepare release 3.0.0

---
 package.json     | 2 +-
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/package.json b/package.json
index 7cad7c8..6e0d647 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "taco_server",
   "description": "The server part for comparing large tabular data using Phovea",
-  "version": "1.0.0-SNAPSHOT",
+  "version": "3.0.0",
   "author": {
     "name": "The Caleydo Team",
     "email": "contact@caleydo.org",
diff --git a/requirements.txt b/requirements.txt
index 6d96a8f..143e6c9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
--e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server
+phovea_server>=4.0.0,<5.0.0
 sklearn==0.0

From 25b95722b336a322b0388930814fde1280992aa6 Mon Sep 17 00:00:00 2001
From: Holger Stitz <holger.stitz@jku.at>
Date: Thu, 16 Jan 2020 14:53:32 +0100
Subject: [PATCH 40/40] Update package.json

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 6e0d647..5d5085e 100644
--- a/package.json
+++ b/package.json
@@ -32,7 +32,7 @@
     "prebuild": "node -e \"process.exit(process.env.PHOVEA_SKIP_TESTS === undefined?1:0)\" || npm run test",
     "build": "rm -rf build/source && find . -name '*.pyc' -delete && node buildPython.js && cp -r ./taco_server build/source/",
     "predist": "npm run build && npm run docs",
-    "dist": "python setup.py sdist bdist_wheel && cd build && tar cvzf ../dist/taco_server.tar.gz *",
+    "dist": "python setup.py sdist bdist_wheel",
     "docs": "sphinx-apidoc -o docs -f ./taco_server && sphinx-build ./docs build/docs"
   }
 }