From 6536bc457f980d70db35a1530e0552e52aada3c8 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Mon, 19 Aug 2019 14:06:35 +0200 Subject: [PATCH 01/40] Removed enum package Due native integration in Python 3 --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 97dc403..6d96a8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server -enum==0.4.6 sklearn==0.0 From 824f86e891e92e23495689888128baf8e0ccec20 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Mon, 19 Aug 2019 14:06:42 +0200 Subject: [PATCH 02/40] Update python requirements --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 7ca3f68..e6ef63b 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,4 +3,4 @@ pep8-naming==0.4.1 pytest==3.0.3 pytest-runner==2.9 Sphinx==1.5.2 -recommonmark==0.4.0 +recommonmark==0.6.0 From 9b578be8c626f5c78150dab7de272e298fc4d0fc Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 20 Aug 2019 08:16:06 +0200 Subject: [PATCH 03/40] CircleCI Docker Image 'python:3.7-node-browsers' created .circleci folder and config.yml --- .circleci/config.yml | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..0fcaf74 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,41 @@ +version: 2 +jobs: + build: + working_directory: ~/phovea + docker: + - image: circleci/python:3.7-node-browsers + environment: + NODE_VERSION: 10.16.3 + tags: + - /v\d+.\d+.\d+.*/ + steps: + - checkout + - run: + command: | + (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y)) + - restore_cache: + key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }} + - run: + name: install-pip-wee + command: | + virtualenv ~/venv + . ~/venv/bin/activate + pip install -r requirements_dev.txt + pip install -r requirements.txt + - save_cache: + key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }} + paths: + - ~/venv + - run: #force update of VCS dependencies? + name: update-pip-vcs-dependencies + command: | + . ~/venv/bin/activate + pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt + - run: + name: dist + command: | + . ~/venv/bin/activate + npm run dist + - store_artifacts: + path: dist + prefix: dist From 45bc67a44f14d0359338efaddf779c100f33d0b0 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 20 Aug 2019 08:16:53 +0200 Subject: [PATCH 04/40] CircleCI Docker image 'python:3.7-node-browsers' created .gitlab.ci.yml --- .gitlab-ci.yml | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..50b4bd1 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,52 @@ +image: circleci/python:3.7-node-browsers + +variables: + NODE_VERSION: "10.16.3" + GIT_DEPTH: "1" + +cache: + key: "$CI_REPOSITORY_URL-$CI_COMMIT_REF_NAME" + paths: + - ~/venv + +before_script: + # Install ssh-agent if not already installed, it is required by Docker. + # (change apt-get to yum if you use a CentOS-based image) + - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )' + + # Run ssh-agent (inside the build environment) + - eval $(ssh-agent -s) + + # Add the SSH key stored in SSH_PRIVATE_KEY variable to the agent store + - ssh-add <(echo "$SSH_PRIVATE_KEY") + + # For Docker builds disable host key checking. Be aware that by adding that + # you are suspectible to man-in-the-middle attacks. + # WARNING: Use this only with the Docker executor, if you use it with shell + # you will overwrite your user's SSH config. + - mkdir -p ~/.ssh + - '[[ -f /.dockerenv ]] && echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config' + +stages: + - install + - build + +install-pip-wee: + stage: install + script: | + (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y)) + virtualenv ~/venv + . ~/venv/bin/activate + pip install --upgrade --upgrade-strategy=only-if-needed -r requirements_dev.txt + pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt + +dist: + stage: build + script: | + . ~/venv/bin/activate + npm run dist + allow_failure: false + artifacts: + expire_in: 1 week + paths: + - dist From 2d9ab1af27ed5530190a970690d6bb7fb50154ed Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 20 Aug 2019 08:21:51 +0200 Subject: [PATCH 05/40] Remove .encode('ascii') from setup.py --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 18e10f0..9383c07 100644 --- a/setup.py +++ b/setup.py @@ -26,12 +26,12 @@ def read_it(name): def packaged(*files): r = {} global pkg - r[pkg['name'].encode('ascii')] = list(files) + r[pkg['name']] = list(files) return r def requirements(file): - return [r.strip().encode('ascii') for r in read_it(file).strip().split('\n') if not r.startswith('-e git+https://')] + return [r.strip() for r in read_it(file).strip().split('\n') if not r.startswith('-e git+https://')] def to_version(v): @@ -39,6 +39,7 @@ def to_version(v): now = datetime.datetime.utcnow() return v.replace('SNAPSHOT', now.strftime('%Y%m%d-%H%M%S')) + setup( name=pkg['name'], version=to_version(pkg['version']), From ca719f6784c45affcd963e26b9c210d9dfc10910 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 20 Aug 2019 08:23:28 +0200 Subject: [PATCH 06/40] Replace python tests py27 and py34 with py37 done for .travis.yml and tox.ini in taco_server --- .travis.yml | 3 +-- tox.ini | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 428bcb9..5fcf10b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,7 @@ language: python sudo: required env: - - TOXENV=py27 - - TOXENV=py34 + - TOXENV=py37 install: - (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y)) diff --git a/tox.ini b/tox.ini index f2734b2..48c0366 100644 --- a/tox.ini +++ b/tox.ini @@ -5,12 +5,11 @@ ############################################################################### [tox] -envlist = py{27,34} +envlist = py{37} [testenv] basepython = - py27: python2.7 - py34: python3.4 + py34: python3.7 deps = flake8 pytest From 325e36f01aea6710f3eb67e11df80ba879f527d0 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 20 Aug 2019 13:03:32 +0200 Subject: [PATCH 07/40] used lib2to3 to convert files to new syntax --- setup.py | 2 +- taco_server/api.py | 3 ++- taco_server/src/diff_cache.py | 6 +++--- taco_server/src/diff_finder.py | 30 +++++++++++++++--------------- taco_server/src/generator.py | 4 ++-- taco_server/src/modifier.py | 26 +++++++++++++------------- taco_server/src/test1.py | 28 ++++++++++++++-------------- 7 files changed, 50 insertions(+), 49 deletions(-) diff --git a/setup.py b/setup.py index 9383c07..9a5030b 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ # Copyright (c) The Caleydo Team. All rights reserved. # Licensed under the new BSD license, available at http://caleydo.org/license ############################################################################### -from __future__ import with_statement, print_function + from setuptools import setup, find_packages from codecs import open from os import path diff --git a/taco_server/api.py b/taco_server/api.py index 84e4e61..5cbda46 100644 --- a/taco_server/api.py +++ b/taco_server/api.py @@ -1,6 +1,6 @@ from phovea_server import ns import timeit -from src import diff_cache +from .src import diff_cache import logging @@ -102,6 +102,7 @@ def create(): """ return app + if __name__ == '__main__': app.debug = True app.run(host='0.0.0.0', port=9000) diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py index 71bf9b7..971bc02 100644 --- a/taco_server/src/diff_cache.py +++ b/taco_server/src/diff_cache.py @@ -3,8 +3,7 @@ # detail (as detail), middle (as count), overview (as ratios) -from __future__ import print_function -from diff_finder import Table, DiffFinder, Diff, Ratios +from .diff_finder import Table, DiffFinder, Diff, Ratios import phovea_server.dataset as dataset import timeit import json @@ -37,6 +36,7 @@ def create_cache_dir(): else: _log.info('use existing cache directory: ' + _cache_directory) + # run immediately! create_cache_dir() @@ -237,7 +237,7 @@ def create_hashname(id1, id2, bins, bins_col, direction, ops): def ratio_from_json(jsonobj): # idk - r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', d.keys())(*d.values())) + r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', list(d.keys()))(*list(d.values()))) # todo find a smarter way, really cr = 0 if not hasattr(r, "c_ratio") else r.c_ratio ar = 0 if not hasattr(r, "a_ratio") else r.a_ratio diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py index 6470676..869dd4c 100644 --- a/taco_server/src/diff_finder.py +++ b/taco_server/src/diff_finder.py @@ -356,8 +356,8 @@ def reorder_rows_counts(self): :param height: :return: """ - ids = map(lambda r: r['id'], self.reorder['rows']) - filtered_content = filter(lambda r: r['row'] in ids, self.content) + ids = [r['id'] for r in self.reorder['rows']] + filtered_content = [r for r in self.content if r['row'] in ids] return float(len(filtered_content)) def reorder_cols_counts(self): @@ -367,8 +367,8 @@ def reorder_cols_counts(self): :param height: :return: """ - ids = map(lambda r: r['id'], self.reorder['cols']) - filtered_content = filter(lambda r: r['col'] in ids, self.content) + ids = [r['id'] for r in self.reorder['cols']] + filtered_content = [r for r in self.content if r['col'] in ids] return float(len(filtered_content)) def reorder_rows_cols_counts(self): @@ -378,9 +378,9 @@ def reorder_rows_cols_counts(self): :param height: :return: """ - row_ids = map(lambda r: r['id'], self.reorder['rows']) - col_ids = map(lambda r: r['id'], self.reorder['cols']) - filtered_content = filter(lambda r: r['col'] in col_ids and r['row'] in row_ids, self.content) + row_ids = [r['id'] for r in self.reorder['rows']] + col_ids = [r['id'] for r in self.reorder['cols']] + filtered_content = [r for r in self.content if r['col'] in col_ids and r['row'] in row_ids] return float(len(filtered_content)) def reorder_counts(self): @@ -479,7 +479,7 @@ def per_bin_ratios(self, bins, e_type): index2bin = np.digitize(indices, bin_range) # todo handle the error here when there's no row ! - pcontent = [[] for x in xrange(bins)] + pcontent = [[] for x in range(bins)] for c in self.content: ci = union_rows.index(c[row]) bin_index = index2bin[ci] @@ -489,7 +489,7 @@ def per_bin_ratios(self, bins, e_type): pcontent[bin_index] += [c] # for structure changes - pstructure = [{"added_" + e_type: [], "deleted_" + e_type: []} for x in xrange(bins)] + pstructure = [{"added_" + e_type: [], "deleted_" + e_type: []} for x in range(bins)] # filter for the structure changes, because once there's a structure change, there's no need to find content #what!! for a in self.structure["added_" + e_type]: ai = union_rows.index(a['id']) @@ -508,7 +508,7 @@ def per_bin_ratios(self, bins, e_type): # convert to np.array to use np.where union_rows = np.array(union_rows) - for i in xrange(bins): + for i in range(bins): temp = union_rows[np.where(index2bin == i)[0]] if dir == D_ROWS: punion = { @@ -565,18 +565,18 @@ def per_entity_ratios(self, dir): pstructure = {} # filter for the structure changes, because once there's a structure change, there's no need to find content # idk why but obj is Diff! - pstructure["added_" + e_type] = filter(lambda obj: obj['id'] == id, self.structure["added_" + e_type]) + pstructure["added_" + e_type] = [obj for obj in self.structure["added_" + e_type] if obj['id'] == id] if len(pstructure["added_" + e_type]) != 0: # create a ratio where it's only added ratio_counts = RatiosAndCounts(Ratios(0, 1, 0, 0), Counts(0, len(union_cols), 0, 0)) else: # find the deleted - pstructure["deleted_" + e_type] = filter(lambda obj: obj['id'] == id, self.structure["deleted_" + e_type]) + pstructure["deleted_" + e_type] = [obj for obj in self.structure["deleted_" + e_type] if obj['id'] == id] if len(pstructure["deleted_" + e_type]) != 0: ratio_counts = RatiosAndCounts(Ratios(0, 0, 1, 0), Counts(0, 0, len(union_cols), 0)) else: # find the content - pcontent = filter(lambda obj: obj[row_id] == id, self.content) + pcontent = [obj for obj in self.content if obj[row_id] == id] if len(pcontent) == 0: pcontent = None # more resonable in the case of subtable @@ -838,7 +838,7 @@ def _compare_ids(self, e_type, ids1, ids2, u_ids, has_merge, has_structure, merg merged_ids = str(j).split(merge_delimiter) for s in merged_ids: # delete the delete operations related to those IDs - deleted_log = filter(lambda obj: obj['id'] != s, deleted_log) + deleted_log = [obj for obj in deleted_log if obj['id'] != s] merged_log += [{"id": s, "pos": np.where(u_ids == s)[0][0], "merge_id": merge_id, "is_added": False}] merge_id += 1 # increment it # log @@ -937,7 +937,7 @@ def _compare_values(self): # fixing an ungly bug when there are NO unique ids! # ## warning! bug ### # this happens when one of the tables does NOT have unique ids and the sizes are different... couldn't fix - print("Oops! it seems that sizes are not matching", cids1.shape[0], cids2.shape[0]) + print(("Oops! it seems that sizes are not matching", cids1.shape[0], cids2.shape[0])) set_boolean = (np.array(list(set(cids1))) != np.array(list(set(cids2)))) cdis = cids1[set_boolean] # ignore and leave diff --git a/taco_server/src/generator.py b/taco_server/src/generator.py index eef2526..817ea19 100644 --- a/taco_server/src/generator.py +++ b/taco_server/src/generator.py @@ -6,12 +6,12 @@ # creates an array with random float values within a range with size def random_floats_array(low, high, size): - return [random.uniform(low, high) for _ in xrange(size)] + return [random.uniform(low, high) for _ in range(size)] # creates an array with random int values within a range with size def random_int_array(low, high, size): - return [random.randint(low, high) for _ in xrange(size)] + return [random.randint(low, high) for _ in range(size)] # creates a homogeneous table diff --git a/taco_server/src/modifier.py b/taco_server/src/modifier.py index f69f3dc..5a03ceb 100644 --- a/taco_server/src/modifier.py +++ b/taco_server/src/modifier.py @@ -1,7 +1,7 @@ import numpy as np import random -import generator as gen -import logger as log +from . import generator as gen +from . import logger as log __author__ = 'Reem' @@ -44,7 +44,7 @@ def del_row(my_array, index): array_length = len(my_array) # check if the table is empty if array_length == 0: - print("Error: list is empty, can't delete a row", index) + print(("Error: list is empty, can't delete a row", index)) return my_array else: if index < array_length: @@ -58,7 +58,7 @@ def del_col(my_array, index): array_length = len(my_array) # check if the table is empty if array_length == 0: - print("Error: list is empty, can't delete a col", index) + print(("Error: list is empty, can't delete a col", index)) return my_array else: row_length = len(my_array[0]) @@ -165,7 +165,7 @@ def merge_columns(full_table, merge_array): # update the IDs col_ids.insert(merge_array[0], merged_id) log.message("merge", "column", merged_id, merge_array) - print(merged_id, cols, merged_col, table) + print((merged_id, cols, merged_col, table)) return {"table": table, "col_ids": col_ids, "row_ids": row_ids} @@ -189,7 +189,7 @@ def merge_rows(full_table, merge_array): # update the IDs row_ids.insert(merge_array[0], merged_id) log.message("merge", "row", merged_id, merge_array) - print(merged_id, rows, merged_row, table) + print((merged_id, rows, merged_row, table)) return {"table": table, "col_ids": col_ids, "row_ids": row_ids} @@ -202,27 +202,27 @@ def change_table(full_table, min_data, max_data, operations): new_row_id = latest_row_id + 1 new_col_id = latest_col_id + 1 # first delete the rows - for r in xrange(operations['del_row']): + for r in range(operations['del_row']): full_table = randomly_change_table(full_table, min_data, max_data, DEL_ROW) # then delete the cols - for c in xrange(operations['del_col']): + for c in range(operations['del_col']): full_table = randomly_change_table(full_table, min_data, max_data, DEL_COL) # then add rows - for r in xrange(operations['add_row']): + for r in range(operations['add_row']): full_table = randomly_change_table(full_table, min_data, max_data, ADD_ROW, new_row_id) new_row_id += 1 # then add cols - for c in xrange(operations['add_col']): + for c in range(operations['add_col']): full_table = randomly_change_table(full_table, min_data, max_data, ADD_COL, new_col_id) new_col_id += 1 # finally change the cells - for c in xrange(operations['ch_cell']): + for c in range(operations['ch_cell']): full_table = randomly_change_table(full_table, min_data, max_data, CH_CELL) # merge operation # the order of this operation might change later for mc in operations['me_col']: # full_table = merge_col(full_table) - print ('merge col', mc) + print(('merge col', mc)) full_table = merge_columns(full_table, mc) for mr in operations['me_row']: full_table = merge_rows(full_table, mr) @@ -277,7 +277,7 @@ def change_table(full_table, min_data, max_data, operations): gen.save_table(result['table'], result['row_ids'], result['col_ids'], data_directory + file_name + str(i + 1) + '_out.csv') # just print the size to add it manually to index.json - print (result['table'].shape[0], result['table'].shape[1], i) + print((result['table'].shape[0], result['table'].shape[1], i)) # update the ... for next round operations_count = { 'del_row': random.randint(0, 25), diff --git a/taco_server/src/test1.py b/taco_server/src/test1.py index 9f632f0..d7523cb 100644 --- a/taco_server/src/test1.py +++ b/taco_server/src/test1.py @@ -39,7 +39,7 @@ def del_row(my_array, index): array_length = len(my_array) # check if the table is empty if array_length == 0: - print("Error: list is empty, can't delete a row", index) + print(("Error: list is empty, can't delete a row", index)) return my_array else: if index < array_length: @@ -53,7 +53,7 @@ def del_col(my_array, index): array_length = len(my_array) # check if the table is empty if array_length == 0: - print("Error: list is empty, can't delete a row", index) + print(("Error: list is empty, can't delete a row", index)) return my_array else: row_length = len(my_array[0]) @@ -80,37 +80,37 @@ def randomly_change_table(table): if change_type == ADD_ROW: index = random.randint(0, len(table)) if len(table) > 0: - new_row = random.sample(range(min_data, max_data), len(table[0])) + new_row = random.sample(list(range(min_data, max_data)), len(table[0])) else: # table is empty - new_row = random.sample(range(min_data, max_data), random.randint(1, largest_row)) - print("log: add a row in ", index, new_row) + new_row = random.sample(list(range(min_data, max_data)), random.randint(1, largest_row)) + print(("log: add a row in ", index, new_row)) table = add_row(table, index, new_row) elif change_type == ADD_COL: if len(table) > 0: index = random.randint(0, len(table[0])) - new_col = random.sample(range(min_data, max_data), len(table)) + new_col = random.sample(list(range(min_data, max_data)), len(table)) else: index = 0 - new_col = random.sample(range(min_data, max_data), random.randint(1, largest_col)) - print("log: add a col in ", index, new_col) + new_col = random.sample(list(range(min_data, max_data)), random.randint(1, largest_col)) + print(("log: add a col in ", index, new_col)) table = add_col(table, index, new_col) elif change_type == CH_CELL: if len(table) > 0: i = random.randint(0, len(table) - 1) j = random.randint(0, len(table[0]) - 1) new_value = random.uniform(min_data, max_data) - print("log: change something somewhere ", i, j, new_value) + print(("log: change something somewhere ", i, j, new_value)) table = change_cell(table, i, j, new_value) else: print("log: there's nothing to change") elif change_type == DEL_ROW: index = random.randint(0, len(table) - 1) - print("log: delete row ", index) + print(("log: delete row ", index)) table = del_row(table, index) elif change_type == DEL_COL: index = random.randint(0, len(table[0]) - 1) - print("log: delete col ", index) + print(("log: delete col ", index)) table = del_col(table, index) return table @@ -152,13 +152,13 @@ def randomly_change_table(table): # table_3 might be from a file as it has to be big input_file = '../../data/small_table_in.csv' my_date = np.genfromtxt(input_file, delimiter=',') -print("this is my data", my_date) +print(("this is my data", my_date)) output_file = "../../data/small_table_out.csv" random.seed(100) num_of_changes = random.randint(2, 20) -print("num of changes is ", num_of_changes - 1) -for i in xrange(1, num_of_changes): +print(("num of changes is ", num_of_changes - 1)) +for i in range(1, num_of_changes): my_date = randomly_change_table(my_date) print(my_date) # print(table_2) From 5ba520d24c0509795a58595cde73ee8f0eeea5ba Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 20 Aug 2019 13:24:14 +0200 Subject: [PATCH 08/40] updated dependencies according to phovea_python --- requirements_dev.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index e6ef63b..a7a5dbf 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,6 @@ -flake8==3.0.4 -pep8-naming==0.4.1 -pytest==3.0.3 -pytest-runner==2.9 -Sphinx==1.5.2 +flake8==3.5.0 +pep8-naming==0.5.0 +pytest==3.5.0 +pytest-runner==4.2 +Sphinx==1.7.2 recommonmark==0.6.0 From 9f1eab822e15951bcf0461ad9da7d20ede09858a Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Thu, 22 Aug 2019 12:10:50 +0200 Subject: [PATCH 09/40] Use node v8.16.1 in circleci docker image --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0fcaf74..55be307 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ jobs: docker: - image: circleci/python:3.7-node-browsers environment: - NODE_VERSION: 10.16.3 + NODE_VERSION: 8.16.1 tags: - /v\d+.\d+.\d+.*/ steps: From 7f0d9fd4adceebbce114b2ca1b9f768d2c90865d Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 22 Aug 2019 12:20:37 +0200 Subject: [PATCH 10/40] removed list() from lib2to3 --- taco_server/src/test1.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/taco_server/src/test1.py b/taco_server/src/test1.py index d7523cb..ecb97ae 100644 --- a/taco_server/src/test1.py +++ b/taco_server/src/test1.py @@ -80,19 +80,19 @@ def randomly_change_table(table): if change_type == ADD_ROW: index = random.randint(0, len(table)) if len(table) > 0: - new_row = random.sample(list(range(min_data, max_data)), len(table[0])) + new_row = random.sample(range(min_data, max_data), len(table[0])) else: # table is empty - new_row = random.sample(list(range(min_data, max_data)), random.randint(1, largest_row)) + new_row = random.sample(range(min_data, max_data), random.randint(1, largest_row)) print(("log: add a row in ", index, new_row)) table = add_row(table, index, new_row) elif change_type == ADD_COL: if len(table) > 0: index = random.randint(0, len(table[0])) - new_col = random.sample(list(range(min_data, max_data)), len(table)) + new_col = random.sample(range(min_data, max_data), len(table)) else: index = 0 - new_col = random.sample(list(range(min_data, max_data)), random.randint(1, largest_col)) + new_col = random.sample(range(min_data, max_data), random.randint(1, largest_col)) print(("log: add a col in ", index, new_col)) table = add_col(table, index, new_col) elif change_type == CH_CELL: From 4eba7f62951adfd5ab3eceb1919e20a54b18a320 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 22 Aug 2019 12:20:59 +0200 Subject: [PATCH 11/40] adapted pandas import --- taco_server/src/diff_cache.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py index 971bc02..d60f742 100644 --- a/taco_server/src/diff_cache.py +++ b/taco_server/src/diff_cache.py @@ -7,7 +7,7 @@ import phovea_server.dataset as dataset import timeit import json -import pandas.json as ujson +import pandas as pd import os import hashlib from collections import namedtuple @@ -50,7 +50,7 @@ def get_diff_cache(filename): file_name = _cache_directory + filename + '.json' if os.path.isfile(file_name): with open(file_name) as data_file: - data = ujson.load(data_file) + data = pd.json.load(data_file) return data # if the file doesn't exist return None @@ -100,16 +100,16 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True): if isinstance(diffobj, Diff): # log the detail - json_result = ujson.dumps(diffobj.serialize()) + json_result = pd.json.dumps(diffobj.serialize()) set_diff_cache(hash_name, json_result) else: # todo later find a way to send the error # e.g. there's no matching column in this case - json_result = ujson.dumps(diffobj) # which is {} for now! + json_result = pd.json.dumps(diffobj) # which is {} for now! set_diff_cache(hash_name, json_result) elif jsonit is False: - diffobj = Diff().unserialize(ujson.loads(json_result)) + diffobj = Diff().unserialize(pd.json.loads(json_result)) if jsonit: return json_result @@ -152,10 +152,10 @@ def get_ratios(id1, id2, direction, ops, bins=1, bins_col=1, jsonit=True): # bin == 1 -> timeline bar chart # bin == -1 -> 2d ratio plot if bins == 1 or bins == -1: - json_ratios = ujson.dumps(ratios.serialize()) + json_ratios = pd.json.dumps(ratios.serialize()) # bin > 1 -> 2d ratio histogram else: - json_ratios = ujson.dumps(ratios) + json_ratios = pd.json.dumps(ratios) # cache this as overview set_diff_cache(hashname, json_ratios) @@ -237,7 +237,7 @@ def create_hashname(id1, id2, bins, bins_col, direction, ops): def ratio_from_json(jsonobj): # idk - r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', list(d.keys()))(*list(d.values()))) + r = json.loads(jsonobj, object_hook=lambda d: namedtuple('X', d.keys())(*list(d.values()))) # todo find a smarter way, really cr = 0 if not hasattr(r, "c_ratio") else r.c_ratio ar = 0 if not hasattr(r, "a_ratio") else r.a_ratio From 62884394290025fcd203667dd665c806631796b9 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Wed, 28 Aug 2019 11:39:49 +0200 Subject: [PATCH 12/40] fixed indentation --- taco_server/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/taco_server/__init__.py b/taco_server/__init__.py index 757a7b9..3204af7 100644 --- a/taco_server/__init__.py +++ b/taco_server/__init__.py @@ -12,9 +12,9 @@ def phovea(registry): """ # generator-phovea:begin registry.append('namespace', 'taco', 'taco_server.api', - { - 'namespace': '/api/taco' - }) + { + 'namespace': '/api/taco' + }) # generator-phovea:end pass From f49ba6690a750a90d7f8e57b7e75dcd2ae5e33ea Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 3 Sep 2019 12:34:46 +0200 Subject: [PATCH 13/40] created JsonEncoder avoid default usage of numpy types --- taco_server/src/json_encoder.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 taco_server/src/json_encoder.py diff --git a/taco_server/src/json_encoder.py b/taco_server/src/json_encoder.py new file mode 100644 index 0000000..c945c86 --- /dev/null +++ b/taco_server/src/json_encoder.py @@ -0,0 +1,21 @@ +""" +Serializer to avoid default usage of numpy integer/float/bytes/etc. +""" +import json +import numpy + + +class JsonEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, numpy.integer): + return int(obj) + elif isinstance(obj, numpy.int64): + return int(obj) + elif isinstance(obj, bytes): + return obj.decode('utf-8') + elif isinstance(obj, numpy.floating): + return float(obj) + elif isinstance(obj, numpy.ndarray): + return obj.tolist() + else: + return super(JsonEncoder, self).default(obj) From 568a56a6853d6ee84fe55d1b62cefbb05a2f5c3c Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Tue, 3 Sep 2019 12:35:12 +0200 Subject: [PATCH 14/40] remove pandas, use json and custom encoder instead --- taco_server/src/diff_cache.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py index d60f742..a106bde 100644 --- a/taco_server/src/diff_cache.py +++ b/taco_server/src/diff_cache.py @@ -7,7 +7,7 @@ import phovea_server.dataset as dataset import timeit import json -import pandas as pd +from . import json_encoder import os import hashlib from collections import namedtuple @@ -50,7 +50,7 @@ def get_diff_cache(filename): file_name = _cache_directory + filename + '.json' if os.path.isfile(file_name): with open(file_name) as data_file: - data = pd.json.load(data_file) + data = json.load(data_file) return data # if the file doesn't exist return None @@ -100,16 +100,17 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True): if isinstance(diffobj, Diff): # log the detail - json_result = pd.json.dumps(diffobj.serialize()) + serialize = Diff.serialize + json_result = (json.dumps(diffobj.__dict__, cls=json_encoder.JsonEncoder)) set_diff_cache(hash_name, json_result) else: # todo later find a way to send the error # e.g. there's no matching column in this case - json_result = pd.json.dumps(diffobj) # which is {} for now! + json_result = json.dumps(diffobj) # which is {} for now! set_diff_cache(hash_name, json_result) elif jsonit is False: - diffobj = Diff().unserialize(pd.json.loads(json_result)) + diffobj = Diff().unserialize(json.loads(json_result)) if jsonit: return json_result @@ -152,10 +153,10 @@ def get_ratios(id1, id2, direction, ops, bins=1, bins_col=1, jsonit=True): # bin == 1 -> timeline bar chart # bin == -1 -> 2d ratio plot if bins == 1 or bins == -1: - json_ratios = pd.json.dumps(ratios.serialize()) + json_ratios = json.dumps(ratios.serialize()) # bin > 1 -> 2d ratio histogram else: - json_ratios = pd.json.dumps(ratios) + json_ratios = json.dumps(ratios) # cache this as overview set_diff_cache(hashname, json_ratios) @@ -232,7 +233,7 @@ def create_hashname(id1, id2, bins, bins_col, direction, ops): :return: """ name = str(id1) + '_' + str(id2) + '_' + str(bins) + '_' + str(bins_col) + '_' + str(direction) + '_' + str(ops) - return hashlib.md5(name).hexdigest() + return hashlib.md5(name.encode('utf-8')).hexdigest() def ratio_from_json(jsonobj): From c198a082ae0b1cff3b0757a03d87e7a408f664de Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 5 Sep 2019 18:08:11 +0200 Subject: [PATCH 15/40] change dtype of numpy.ndarray to string --- taco_server/src/diff_cache.py | 4 ++-- taco_server/src/json_encoder.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py index a106bde..6db01fe 100644 --- a/taco_server/src/diff_cache.py +++ b/taco_server/src/diff_cache.py @@ -199,12 +199,12 @@ def stratify_matrix(m): if row_strat is not None: rowids = list(m.rowids()) row_indices = [rowids.index(o) for o in row_strat.rowids()] - data = data[row_indices, ...] + data = data[row_indices, ...].astype('str') if col_strat is not None: colids = list(m.colids()) col_indices = [colids.index(o) for o in col_strat.rowids()] - data = data[..., col_indices] + data = data[..., col_indices].astype('str') return Table(rows, cols, data) diff --git a/taco_server/src/json_encoder.py b/taco_server/src/json_encoder.py index c945c86..0e084b7 100644 --- a/taco_server/src/json_encoder.py +++ b/taco_server/src/json_encoder.py @@ -13,6 +13,8 @@ def default(self, obj): return int(obj) elif isinstance(obj, bytes): return obj.decode('utf-8') + elif isinstance(obj, numpy.bytes_): + return obj.decode('utf-8') elif isinstance(obj, numpy.floating): return float(obj) elif isinstance(obj, numpy.ndarray): From c269808c72238beac4c22cefcef3a05dac43a48a Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 5 Sep 2019 18:12:57 +0200 Subject: [PATCH 16/40] removed IndexError - changed dtype of numpy.ndarray to string - ensure that index 'i' can be found in array ('disordered' can eventually be an encapsulated array) --- taco_server/src/diff_finder.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py index 869dd4c..a06732a 100644 --- a/taco_server/src/diff_finder.py +++ b/taco_server/src/diff_finder.py @@ -509,7 +509,7 @@ def per_bin_ratios(self, bins, e_type): # convert to np.array to use np.where union_rows = np.array(union_rows) for i in range(bins): - temp = union_rows[np.where(index2bin == i)[0]] + temp = union_rows[np.where(index2bin == i)[0]].astype('str').tolist() if dir == D_ROWS: punion = { "ur_ids": temp, @@ -872,6 +872,7 @@ def _compare_values1(self): # @disordered is an array of the IDs that are available in x and not in the matching position in y (or not available at all) # in case x and y are a result of the intersection then disordered is the list of disordered IDs in x def _find_reorder(self, ids1, ids2, x, y, disordered, direction): + import numpy # todo this should be as the size of the original ids not just the intesection ids # x shape or y shape should be the same # or the shape of the IDs in the second table (original y) @@ -879,20 +880,25 @@ def _find_reorder(self, ids1, ids2, x, y, disordered, direction): reordered = [] for i in disordered: # todo check this with more than 2 changes - pos_table1 = np.where(ids1 == i)[0][0] - pos_table2 = np.where(ids2 == i)[0][0] + if isinstance(i, numpy.ndarray): + i = i[0] + try: + pos_table1 = np.where(ids1 == i)[0][0] + pos_table2 = np.where(ids2 == i)[0][0] + except IndexError: + print('index error') # todo substitute this with the new one! reordered.append({'id': i, 'from': pos_table1, 'to': pos_table2, 'diff': pos_table2 - pos_table1}) old = np.where(x == i)[0][0] new = np.where(y == i)[0][0] np.put(indices, old, new) - # index = [] - # for i in x: - # if i != y[np.where(x == i)[0][0]]: - # index += [np.where(y == i)[0][0]] - # else: - # index += [np.where(x == i)[0][0]] + # index = [] + # for i in x: + # if i != y[np.where(x == i)[0][0]]: + # index += [np.where(y == i)[0][0]] + # else: + # index += [np.where(x == i)[0][0]] self._reorder_to_json(direction, reordered) return indices @@ -934,7 +940,7 @@ def _compare_values(self): try: cdis = cids1[cids1 != cids2] except ValueError: - # fixing an ungly bug when there are NO unique ids! + # fixing an ungly bug when there are NO unique ids! # ## warning! bug ### # this happens when one of the tables does NOT have unique ids and the sizes are different... couldn't fix print(("Oops! it seems that sizes are not matching", cids1.shape[0], cids2.shape[0])) @@ -949,7 +955,7 @@ def _compare_values(self): inter2 = inter2[:, c_indices] # at this point inter2 should look good hopefully! # diff work - diff = inter2 - inter1 + diff = inter2.astype('float') - inter1.astype('float') # done :) # normalization normalized_diff = normalize_float_11(diff) From 83617a0f4fd1bbcc1e3d38cc8d95daf7c8c0cff8 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Mon, 14 Oct 2019 15:16:25 +0200 Subject: [PATCH 17/40] remove _travis.yml_ --- .travis.yml | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 5fcf10b..0000000 --- a/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -language: python -sudo: required -env: - - TOXENV=py37 - -install: - - (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y)) - - pip install -r requirements_dev.txt - - pip install -r requirements.txt - -script: npm run dist - -deploy: - provider: releases - api_key: - secure: TK9/P34Bi3WuppiDrBCwVcn41yCBwmILaU8hXTBzUPbT7TbeFIwsC6/4CtH85Z+ZrUve4S5pTmWRNf2dQDxWw3uYu7+bJuemV2J1LHG76mognj+TNEiYxfLQUt3Gql4W7C7FcI4Rlx5/uMN9wY1wro8TWUBMwT6jjSrUWIvK3GXoojd5bHvJx07XpjWl9wCon4D0ruZiFoM2mdeP23lbc2GckETi32oEKswnQXxkMACmxbPzoWbvkxH4aK8Bt2Rj2sl2TbPhVkN6DAkHGkGAvLI+2/aRfG27+oo3OKsaDjbuGABct8TfZccJ970CbQ8kbnCjYxstvqkg1JWjF0W67sX/flBZZOEUA5l0OLWo6HqMGMxm7/lEQhIdPMsRmvXL+HVOxkMrB2dda58QzxVwiZp+rRqUaeabPZp8Kl5xodGrVxsBvxe6zAbJ5jCtCSumG6+kLyKI00/kYlghqQNrgUw0ZsYJlQ34h3lo/24QpaeyDpQoCkGWQgtgqiXGpeKSu7bCnOqIqAy3nbT9Utwj7K8gIasTG5idosEAz/THMampNbGDuyxxc340sYGNMg9Bhm1g2ILWRdtV470p5hwBtIDTKi3/PAizEO26+Wh0zI47Sg3ao57avcbCsTmzbZUeA5J4bojmchhJCHX8su9cSCGh/2fJA/1eBIgEvOQ8LNE= - file_glob: true - file: dist/taco_server*.egg - on: - tags: true - -notifications: - slack: - secure: E8/1UIdHSczUbN+6i6gd1d5LM4vmLdwLQ30tpyjvnM0wvfDce76oPxLJAy240WJ5ybXRZUtNrttpVpt4tEXCy8aLFCmxD7s77rVloH+q1J8R/ptTFWZGhFGEujk1awEmVbzcWxJkV9/JENQaeGBKxwv8/EQwWwEkAb7p/+AJb9owmH88b3wUZUGHBWtbMiyyaF4Rm1Wg1stJB8Z1Ga7PRF4cqufTgcDdsCPVv9gAY+VxOIGqX/Vfuc9UWpUH8vq8lHUE7Inn5QS78kuFfSgLWga3H6Mu/Gko1XNlWk0QWWQBUvEZ6ZC6Wuo68KzvUjJHDTnx8WyfHue2JNHIslcX+eJq2WHLeEgM24VeNkILCGo/H/60NGHiSjrIv/Y9h6bQ9FDjo6TUyE4nbdPYN1RN9FQ5UbI9Y4Gi753H9mqnHWlEywBOzHxdZCAuz9Wh03CCF/blsvJ+Obbyo6Jrfe+g44jyi9kQdBNQ78qG6v4EXws8FiYao6x3PpgIwFix42Cpr+soAh5FpA3C1zHSAyZZpXF65/lrDl5yPNofK7Wy0B9bw+0I6Z/u7ZKFNVZXvYPGYvtUVcsALGBdmYc61+LCta36Po0KZseWVAlJj6QnOJDYzv0wvV/zsuf9A5KpYFGiqV9Q7zmtiO5FYF5sBy+lE7O9tHVO4O18IRndhRQgxhs= - on_success: change - on_failure: always From 83d4ca1cbe8719d65cac8927de177d6ce7ae37ba Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Mon, 14 Oct 2019 15:17:04 +0200 Subject: [PATCH 18/40] switch branches in _requirements.txt_ --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6d96a8f..4f95b4d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ --e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server +-e git+https://github.com/phovea/phovea_server.git@python_3.7#egg=phovea_server sklearn==0.0 From 8c27fad8f0629d98142e2850677293763ddb147d Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Mon, 14 Oct 2019 15:21:09 +0200 Subject: [PATCH 19/40] update node environment variable in circleci config --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 55be307..0fcaf74 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ jobs: docker: - image: circleci/python:3.7-node-browsers environment: - NODE_VERSION: 8.16.1 + NODE_VERSION: 10.16.3 tags: - /v\d+.\d+.\d+.*/ steps: From dc06b458f9c763aeed1c74682f3823a5a5c8b8dd Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Fri, 18 Oct 2019 10:03:19 +0200 Subject: [PATCH 20/40] switch branches in _requirements.txt_ --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4f95b4d..b10f8b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ --e git+https://github.com/phovea/phovea_server.git@python_3.7#egg=phovea_server +-e git+https://github.com/phovea/phovea_server.git@flask_1.1.1#egg=phovea_server sklearn==0.0 From 5fd73c5fdf35033e708bc1238e994622363237e7 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Wed, 23 Oct 2019 07:54:54 +0200 Subject: [PATCH 21/40] unify _package.json_ --- package.json | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/package.json b/package.json index b00a772..e903cd4 100644 --- a/package.json +++ b/package.json @@ -1,4 +1,21 @@ { + "name": "taco_server", + "description": "The server part for comparing large tabular data using Phovea", + "version": "1.0.0-SNAPSHOT", + "author": { + "name": "The Caleydo Team", + "email": "contact@caleydo.org", + "url": "https://caleydo.org" + }, + "license": "BSD-3-Clause", + "homepage": "http://caleydo.org", + "bugs": { + "url": "https://github.com/caleydo/taco_server/issues" + }, + "repository": { + "type": "git", + "url": "https://github.com/caleydo/taco_server.git" + }, "files": [ "taco_server", "__init__.py", @@ -17,22 +34,5 @@ "predist": "npm run build && npm run docs", "dist": "python setup.py bdist_egg && cd build && tar cvzf ../dist/taco_server.tar.gz *", "docs": "sphinx-apidoc -o docs -f ./taco_server && sphinx-build ./docs build/docs" - }, - "name": "taco_server", - "description": "The server part for comparing large tabular data using Phovea", - "homepage": "http://caleydo.org", - "version": "1.0.0-SNAPSHOT", - "author": { - "name": "The Caleydo Team", - "email": "contact@caleydo.org", - "url": "https://caleydo.org" - }, - "license": "BSD-3-Clause", - "bugs": { - "url": "https://github.com/caleydo/taco_server/issues" - }, - "repository": { - "type": "git", - "url": "https://github.com/caleydo/taco_server.git" } } From 056c5a78589269234dbe0eac9e229448635de247 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Mon, 28 Oct 2019 10:37:20 +0100 Subject: [PATCH 22/40] fix problems with JSON encoder - remove underscore - change type of ndarray --- taco_server/src/json_encoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taco_server/src/json_encoder.py b/taco_server/src/json_encoder.py index 0e084b7..124878c 100644 --- a/taco_server/src/json_encoder.py +++ b/taco_server/src/json_encoder.py @@ -13,11 +13,11 @@ def default(self, obj): return int(obj) elif isinstance(obj, bytes): return obj.decode('utf-8') - elif isinstance(obj, numpy.bytes_): + elif isinstance(obj, numpy.bytes): return obj.decode('utf-8') elif isinstance(obj, numpy.floating): return float(obj) elif isinstance(obj, numpy.ndarray): - return obj.tolist() + return obj.astype(str) else: return super(JsonEncoder, self).default(obj) From 981193d2bb7b3b56cec301c23e1edcd64ed3fab6 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Wed, 30 Oct 2019 08:01:39 +0100 Subject: [PATCH 23/40] use `destination` instead of invalid `prefix` in _config.yml_ --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0fcaf74..3c9ab59 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,4 +38,4 @@ jobs: npm run dist - store_artifacts: path: dist - prefix: dist + destination: dist From dc56e50ae8337d4d6f5c9bc7724761ee64179e7f Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Wed, 30 Oct 2019 08:02:15 +0100 Subject: [PATCH 24/40] update node version to "12.13" and remove environment variable (no frontend-only repo) --- .circleci/config.yml | 2 -- .gitlab-ci.yml | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3c9ab59..c021421 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,8 +4,6 @@ jobs: working_directory: ~/phovea docker: - image: circleci/python:3.7-node-browsers - environment: - NODE_VERSION: 10.16.3 tags: - /v\d+.\d+.\d+.*/ steps: diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 50b4bd1..1da2330 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ image: circleci/python:3.7-node-browsers variables: - NODE_VERSION: "10.16.3" + NODE_VERSION: "12.13" GIT_DEPTH: "1" cache: From fbdb8aa01646525212d8fd34ead6ff8a20c7340e Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 31 Oct 2019 07:59:58 +0100 Subject: [PATCH 25/40] change type of numpy array to avoid error with `bytes` and `string` --- taco_server/src/diff_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py index a06732a..bc5cb24 100644 --- a/taco_server/src/diff_finder.py +++ b/taco_server/src/diff_finder.py @@ -740,7 +740,7 @@ def __init__(self, t1, t2, rowtype, coltype, direction): self.diff = Diff(direction=self._direction) self.union = {} self.intersection = {} # we only need this for rows when we have content changes - self.intersection["ic_ids"] = get_intersection(self._table1.col_ids, self._table2.col_ids) + self.intersection["ic_ids"] = get_intersection(self._table1.col_ids, self._table2.col_ids.astype(str)) if self.intersection["ic_ids"].shape[0] > 0: # there's at least one common column between the tables # otherwise there's no need to calculate the unions From 46f81ce37fed9e7fe7438fdc33b5d2c58f3450d7 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 31 Oct 2019 11:13:02 +0100 Subject: [PATCH 26/40] fix check whether key exists --- taco_server/src/diff_finder.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py index 6470676..2b63905 100644 --- a/taco_server/src/diff_finder.py +++ b/taco_server/src/diff_finder.py @@ -172,11 +172,11 @@ def serialize(self): } def unserialize(self, json_obj): - self.content = [] if json_obj['content'] is None else json_obj['content'] - self.structure = {} if json_obj['structure'] is None else json_obj['structure'] - self.merge = {} if json_obj['merge'] is None else json_obj['merge'] - self.reorder = {'rows': [], 'cols': []} if json_obj['reorder'] is None else json_obj['reorder'] - self.union = {} if json_obj['union'] is None else json_obj['union'] + self.content = json_obj['content'] if 'content' in list(json_obj.keys()) else [] + self.structure = json_obj['structure'] if 'structure' in list(json_obj.keys()) else {} + self.merge = json_obj['merge'] if 'merge' in list(json_obj.keys()) else {} + self.reorder = json_obj['reorder'] if 'reorder' in list(json_obj.keys()) else {'rows': [], 'cols': []} + self.union = json_obj['union'] if 'union' in list(json_obj.keys()) else {} return self def content_counts_percell(self): @@ -421,7 +421,7 @@ def aggregate(self, bins, bins_col=2): # it's the case of histogram or bar plot result = {} if self._direction == D_ROWS_COLS or self._direction == D_ROWS: - union_rows = self.union['ur_ids'] + union_rows = self.union['ur_ids'] if 'ur_ids' in list(self.union.keys()) else [] max_height = len(union_rows) if bins >= max_height: # this is the case of bar plot @@ -437,7 +437,7 @@ def aggregate(self, bins, bins_col=2): # todo the rows might have different bins number than the cols if self._direction == D_ROWS_COLS or self._direction == D_COLS: # if it's the cols not the rows then switch - union_cols = self.union['uc_ids'] + union_cols = self.union['uc_ids'] if 'uc_ids' in list(self.union.keys()) else [] max_width = len(union_cols) if bins_col >= max_width: # todo handle the > alone or? @@ -540,15 +540,15 @@ def per_entity_ratios(self, dir): # get a partial diff where every row is a diff # 1. Partition # get the direction - union_rows = self.union['ur_ids'] - union_cols = self.union['uc_ids'] + union_rows = self.union['ur_ids'] if 'ur_ids' in list(self.union.keys()) else [] + union_cols = self.union['uc_ids'] if 'uc_ids' in list(self.union.keys()) else [] e_type = "rows" row_id = "row" if dir == D_COLS: # if it's the cols not the rows then switch - union_rows = self.union['uc_ids'] - union_cols = self.union['ur_ids'] + union_rows = self.union['uc_ids'] if 'uc_ids' in list(self.union.keys()) else [] + union_cols = self.union['ur_ids'] if 'ur_ids' in list(self.union.keys()) else [] # todo handle the case of both rows and columns e_type = "cols" row_id = "col" From 1e812f9993c4be5d1184f3f9ba54059f2ec7614d Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 31 Oct 2019 15:10:08 +0100 Subject: [PATCH 27/40] switch branches in _requirements.txt_ --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b10f8b8..4f95b4d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ --e git+https://github.com/phovea/phovea_server.git@flask_1.1.1#egg=phovea_server +-e git+https://github.com/phovea/phovea_server.git@python_3.7#egg=phovea_server sklearn==0.0 From dab80fe7047214847f04f75b0ae535c2888ba0b3 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 31 Oct 2019 15:17:21 +0100 Subject: [PATCH 28/40] fix error with indentation and unused variables --- taco_server/__init__.py | 7 +++---- taco_server/src/diff_cache.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/taco_server/__init__.py b/taco_server/__init__.py index 3204af7..c905365 100644 --- a/taco_server/__init__.py +++ b/taco_server/__init__.py @@ -11,10 +11,9 @@ def phovea(registry): :param registry: """ # generator-phovea:begin - registry.append('namespace', 'taco', 'taco_server.api', - { - 'namespace': '/api/taco' - }) + registry.append('namespace', 'taco', 'taco_server.api', { + 'namespace': '/api/taco' + }) # generator-phovea:end pass diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py index 6db01fe..c027926 100644 --- a/taco_server/src/diff_cache.py +++ b/taco_server/src/diff_cache.py @@ -100,7 +100,7 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True): if isinstance(diffobj, Diff): # log the detail - serialize = Diff.serialize + serialize = Diff.serialize # noqa E121 json_result = (json.dumps(diffobj.__dict__, cls=json_encoder.JsonEncoder)) set_diff_cache(hash_name, json_result) else: From eff794df87762d762cfd80a9e2a5f3307f21edcc Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 31 Oct 2019 15:20:06 +0100 Subject: [PATCH 29/40] add spaces --- taco_server/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taco_server/__init__.py b/taco_server/__init__.py index c905365..5beac01 100644 --- a/taco_server/__init__.py +++ b/taco_server/__init__.py @@ -12,7 +12,7 @@ def phovea(registry): """ # generator-phovea:begin registry.append('namespace', 'taco', 'taco_server.api', { - 'namespace': '/api/taco' + 'namespace': '/api/taco' }) # generator-phovea:end pass From 9889a8643d26a521e9dc16ce881bb07c48bb042e Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 31 Oct 2019 15:32:18 +0100 Subject: [PATCH 30/40] add decoding --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index 2a12e00..63fa1ef 100644 --- a/build.py +++ b/build.py @@ -19,7 +19,7 @@ def _resolve_plugin(repo, version): if os.path.isdir('.git') and repo: if repo.endswith('.git'): repo = repo[0:-4] - return repo + '/commit/' + _git_head('.') + return repo + '/commit/' + _git_head('.').decode('utf-8') # not a git repo return version From 06f6f0cd6add8ca204ff90acf8756275f9416d15 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Wed, 18 Dec 2019 21:25:37 +0100 Subject: [PATCH 31/40] Update template files and dependencies Using generator-phovea v3.0.0 - Update CircleCI config - Update dev dependencies - Update buildPython.js --- .circleci/config.yml | 28 +++++++++---- .yo-rc.json | 8 +++- buildPython.js | 69 +++++++++++++++++++++++++++++++ deploy/docker-compose.partial.yml | 1 + package.json | 8 ++-- requirements.txt | 2 +- setup.py | 4 +- tox.ini | 2 +- 8 files changed, 107 insertions(+), 15 deletions(-) create mode 100644 buildPython.js create mode 100644 deploy/docker-compose.partial.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index 93e6342..5f59cc4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,16 +3,27 @@ jobs: build: working_directory: ~/phovea docker: - - image: caleydo/phovea_circleci_python:v2.0 + - image: caleydo/phovea_circleci_python:v3.0 steps: - checkout - run: + name: Show Node.js and npm version + command: | + node -v + npm -v + - run: + name: Show Python and pip version + command: | + python --version + pip --version + - run: + name: Install Docker packages from docker_packages.txt command: | (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y)) - restore_cache: key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }} - run: - name: install-pip-wee + name: Install pip requirements command: | virtualenv ~/venv . ~/venv/bin/activate @@ -22,13 +33,16 @@ jobs: key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }} paths: - ~/venv - - run: #force update of VCS dependencies? - name: update-pip-vcs-dependencies + - run: + name: Force an update of pip dependencies from git repositories # not sure if this is working ? command: | . ~/venv/bin/activate pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt - run: - name: dist + name: Show installed pip packages + command: pip list || true + - run: + name: Build command: | . ~/venv/bin/activate npm run dist @@ -39,8 +53,8 @@ workflows: version: 2 # build-nightly: # triggers: -# - schedule: # nightly build during weekday -# cron: "15 1 * * 1-5" +# - schedule: +# cron: "15 1 * * 1-5" # "At 01:15 on every day-of-week from Monday through Friday.”, see: https://crontab.guru/#15_1_*_*_1-5 # filters: # branches: # only: diff --git a/.yo-rc.json b/.yo-rc.json index 896b193..9c31be8 100644 --- a/.yo-rc.json +++ b/.yo-rc.json @@ -27,6 +27,12 @@ "debianPackages": [], "redhatPackages": [] }, - "today": "Tue, 08 Nov 2016 08:36:05 GMT" + "today": "Tue, 08 Nov 2016 08:36:05 GMT", + "promptValues": { + "authorName": "The Caleydo Team", + "authorEmail": "contact@caleydo.org", + "authorUrl": "https://caleydo.org", + "githubAccount": "caleydo" + } } } \ No newline at end of file diff --git a/buildPython.js b/buildPython.js new file mode 100644 index 0000000..c30f2e8 --- /dev/null +++ b/buildPython.js @@ -0,0 +1,69 @@ +/** + * Created by sam on 13.11.2016. + */ + +const spawnSync = require('child_process').spawnSync; +const fs = require('fs'); + +function gitHead(cwd) { + const r = spawnSync('git', ['rev-parse', '--verify', 'HEAD'], { + cwd: cwd + }); + if (!r.stdout) { + console.error(cwd, r.error); + return 'error'; + } + return r.stdout.toString().trim(); +} + +function resolvePlugin(repo, version) { + if (fs.lstatSync('.git').isDirectory() && repo) { + if (repo.endsWith('.git')) { + repo = repo.slice(0, repo.length - 4); + return repo + '/commit/' + gitHead('.'); + } + } + // not a git repo + return version; +} + +function toVersion(v) { + const now = new Date().toISOString(); + // %Y%m%d-%H%M%S + const fmt = now + .replace(/T/, ' ') + .replace(/\..+/, '') + .replace(/[-:]/, '') + .replace(' ', '-'); + return v.replace('SNAPSHOT', fmt); +} + +function _main() { + const pkg = require('./package.json'); + const name = pkg.name; + const version = toVersion(pkg.version); + const resolved = resolvePlugin((pkg.repository || {}).url, version); + + const buildInfo = { + name, + version, + resolved, + description: pkg.description, + homepage: pkg.homepage, + repository: (pkg.repository || {}).url + }; + + const l = ('build/source/' + name.toLowerCase()).split('/'); + l.forEach((_, i) => { + const path = l.slice(0, i + 1).join('/'); + if (!fs.existsSync(path)) { + fs.mkdirSync(path); + } + }); + + fs.writeFileSync('build/source/' + name.toLowerCase() + '/buildInfo.json', JSON.stringify(buildInfo, null, ' ')); +} + +if (require.main === module) { + _main(); +} diff --git a/deploy/docker-compose.partial.yml b/deploy/docker-compose.partial.yml new file mode 100644 index 0000000..745b1fd --- /dev/null +++ b/deploy/docker-compose.partial.yml @@ -0,0 +1 @@ +version: '2.0' diff --git a/package.json b/package.json index e903cd4..7cad7c8 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ }, "repository": { "type": "git", - "url": "https://github.com/caleydo/taco_server.git" + "url": "https://github.com/Caleydo/taco_server.git" }, "files": [ "taco_server", @@ -26,13 +26,13 @@ "docker_packages.txt" ], "scripts": { - "check": "flake8", + "check": "flake8 --exclude=.git,venv,deploy,docs,__pycache__,node_modules", "pretest": "npm run check", "test": "test ! -d tests || python setup.py test", "prebuild": "node -e \"process.exit(process.env.PHOVEA_SKIP_TESTS === undefined?1:0)\" || npm run test", - "build": "python build.py", + "build": "rm -rf build/source && find . -name '*.pyc' -delete && node buildPython.js && cp -r ./taco_server build/source/", "predist": "npm run build && npm run docs", - "dist": "python setup.py bdist_egg && cd build && tar cvzf ../dist/taco_server.tar.gz *", + "dist": "python setup.py sdist bdist_wheel && cd build && tar cvzf ../dist/taco_server.tar.gz *", "docs": "sphinx-apidoc -o docs -f ./taco_server && sphinx-build ./docs build/docs" } } diff --git a/requirements.txt b/requirements.txt index 97dc403..0cdf3da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server enum==0.4.6 -sklearn==0.0 +sklearn==0.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 18e10f0..b4f30f1 100644 --- a/setup.py +++ b/setup.py @@ -39,12 +39,14 @@ def to_version(v): now = datetime.datetime.utcnow() return v.replace('SNAPSHOT', now.strftime('%Y%m%d-%H%M%S')) + setup( - name=pkg['name'], + name=pkg['name'].lower(), version=to_version(pkg['version']), url=pkg['homepage'], description=pkg['description'], long_description=read_it('README.md'), + long_description_content_type='text/markdown', keywords=pkg.get('keywords', ''), author=pkg['author']['name'], author_email=pkg['author']['email'], diff --git a/tox.ini b/tox.ini index f2734b2..a349028 100644 --- a/tox.ini +++ b/tox.ini @@ -21,7 +21,7 @@ commands = py.test tests [flake8] -ignore=E111,E114,E501 +ignore=E111,E114,E501,E121,E123,E126,E226,E24,E704 exclude = .tox,*.egg,build,data,.git,__pycache__,docs,node_modules [pytest] From 1002ad54c6f342eb8d23ca3f78fa1affbe23d3a3 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Wed, 18 Dec 2019 21:31:05 +0100 Subject: [PATCH 32/40] Fix flake8 --- data/olympics_generator/count_by_year.py | 59 +++++++++++++----------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/data/olympics_generator/count_by_year.py b/data/olympics_generator/count_by_year.py index 229b97d..ebef3c0 100644 --- a/data/olympics_generator/count_by_year.py +++ b/data/olympics_generator/count_by_year.py @@ -3,21 +3,22 @@ createdCSVs = [] -def writeIndexJson(): + +def write_index_json(): with open('../index.json', 'w') as outfile: json.dump(createdCSVs, outfile) -def writeCSV(year, medalType, fieldnames, medalsPerCountry): +def write_csv(year, medal_type, fieldnames, medals_per_country): if year is None: print('Invalid year -> file not written') return - name = 'Olympic Games ' + year + ' (' + medalType + ' Medals)' - filename = 'olympics_' + year + '_' + medalType.lower() + '.csv' + name = 'Olympic Games ' + year + ' (' + medal_type + ' Medals)' + filename = 'olympics_' + year + '_' + medal_type.lower() + '.csv' # sort countries by sum of all medals - sortedBySum = sorted(medalsPerCountry.items(), key=lambda x: sum(x[1].values()), reverse=True) + sortedBySum = sorted(medals_per_country.items(), key=lambda x: sum(x[1].values()), reverse=True) print('----------------') print('Write ' + filename) @@ -26,7 +27,7 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry): # get min and max value of the whole csv for the range maxValue = float('-inf') - #minValue = float('inf') # does not work, because we fill empty cells with 0 by default + # minValue = float('inf') # does not work, because we fill empty cells with 0 by default with open('../' + filename, 'wb') as output: writer = csv.DictWriter(output, fieldnames=fieldnames, restval='0', dialect='excel') @@ -34,7 +35,7 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry): for k, v in sortedBySum: values = list(v.values()) maxValue = max(maxValue, max(values)) - #minValue = min(minValue, min(values)) + # minValue = min(minValue, min(values)) v['CountryCode'] = k writer.writerow(v) @@ -43,7 +44,7 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry): stats['name'] = name stats['path'] = filename stats['type'] = 'matrix' - stats['size'] = [len(sortedBySum), len(fieldnames)-1] # -1 = CountryCode fieldname + stats['size'] = [len(sortedBySum), len(fieldnames)-1] # -1 = CountryCode fieldname stats['rowtype'] = 'Country' stats['coltype'] = 'Discipline' stats['value'] = dict(type='real', range=[0, maxValue]) @@ -52,48 +53,50 @@ def writeCSV(year, medalType, fieldnames, medalsPerCountry): print('----------------') -def readCSV(medalType = 'Total'): + +def read_csv(medal_type='Total'): with open('./MedalData1.csv', 'rb') as csvfile: - reader = csv.DictReader(csvfile, fieldnames=['Games','Sport','Event','Athlete(s)','CountryCode','CountryName','Medal','ResultInSeconds'], dialect='excel-tab') + reader = csv.DictReader(csvfile, fieldnames=['Games', 'Sport', 'Event', 'Athlete(s)', 'CountryCode', 'CountryName', 'Medal', 'ResultInSeconds'], dialect='excel-tab') next(reader) lastGames = None fieldnames = ['CountryCode'] - medalsPerCountry = dict() + medals_per_country = dict() for row in reader: if row['Games'] != lastGames: # write old year when a new year is detected - writeCSV(lastGames, medalType, fieldnames, medalsPerCountry) + write_csv(lastGames, medal_type, fieldnames, medals_per_country) # clean up variables fieldnames = ['CountryCode'] - medalsPerCountry = dict() + medals_per_country = dict() lastGames = row['Games'] - country = row['CountryCode'] # short-cut + country = row['CountryCode'] # short-cut if row['Event'] not in fieldnames: fieldnames.append(row['Event']) - if row['Medal'] == medalType or medalType is 'Total': - if country not in medalsPerCountry: - medalsPerCountry[country] = dict() - #medalsPerCountry[country]['CountryCode'] = country + if row['Medal'] == medal_type or medal_type == 'Total': + if country not in medals_per_country: + medals_per_country[country] = dict() + # medals_per_country[country]['CountryCode'] = country - if row['Event'] not in medalsPerCountry[country]: - medalsPerCountry[country][row['Event']] = 0 + if row['Event'] not in medals_per_country[country]: + medals_per_country[country][row['Event']] = 0 - medalsPerCountry[country][row['Event']] += 1 + medals_per_country[country][row['Event']] += 1 - #print(row['Games'], row['Event'], country, row['Medal']) + # print(row['Games'], row['Event'], country, row['Medal']) # write the last file - writeCSV(lastGames, medalType, fieldnames, medalsPerCountry) + write_csv(lastGames, medal_type, fieldnames, medals_per_country) + -readCSV('Total') -readCSV('Bronze') -readCSV('Silver') -readCSV('Gold') +read_csv('Total') +read_csv('Bronze') +read_csv('Silver') +read_csv('Gold') -writeIndexJson() +write_index_json() From f82ae49384f92d764af17d7bf9b9442241934ade Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Wed, 18 Dec 2019 21:33:57 +0100 Subject: [PATCH 33/40] Fix flake8 --- data/olympics_generator/count_by_year.py | 34 ++++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/data/olympics_generator/count_by_year.py b/data/olympics_generator/count_by_year.py index ebef3c0..797f8b2 100644 --- a/data/olympics_generator/count_by_year.py +++ b/data/olympics_generator/count_by_year.py @@ -1,12 +1,12 @@ import csv import json -createdCSVs = [] +created_cvs_list = [] def write_index_json(): with open('../index.json', 'w') as outfile: - json.dump(createdCSVs, outfile) + json.dump(created_cvs_list, outfile) def write_csv(year, medal_type, fieldnames, medals_per_country): @@ -18,24 +18,24 @@ def write_csv(year, medal_type, fieldnames, medals_per_country): filename = 'olympics_' + year + '_' + medal_type.lower() + '.csv' # sort countries by sum of all medals - sortedBySum = sorted(medals_per_country.items(), key=lambda x: sum(x[1].values()), reverse=True) + sorted_by_sum = sorted(medals_per_country.items(), key=lambda x: sum(x[1].values()), reverse=True) print('----------------') print('Write ' + filename) print(fieldnames) - print(sortedBySum) + print(sorted_by_sum) # get min and max value of the whole csv for the range - maxValue = float('-inf') - # minValue = float('inf') # does not work, because we fill empty cells with 0 by default + max_value = float('-inf') + # min_value = float('inf') # does not work, because we fill empty cells with 0 by default with open('../' + filename, 'wb') as output: writer = csv.DictWriter(output, fieldnames=fieldnames, restval='0', dialect='excel') writer.writeheader() - for k, v in sortedBySum: + for k, v in sorted_by_sum: values = list(v.values()) - maxValue = max(maxValue, max(values)) - # minValue = min(minValue, min(values)) + max_value = max(max_value, max(values)) + # min_value = min(min_value, min(values)) v['CountryCode'] = k writer.writerow(v) @@ -44,12 +44,12 @@ def write_csv(year, medal_type, fieldnames, medals_per_country): stats['name'] = name stats['path'] = filename stats['type'] = 'matrix' - stats['size'] = [len(sortedBySum), len(fieldnames)-1] # -1 = CountryCode fieldname + stats['size'] = [len(sorted_by_sum), len(fieldnames)-1] # -1 = CountryCode fieldname stats['rowtype'] = 'Country' stats['coltype'] = 'Discipline' - stats['value'] = dict(type='real', range=[0, maxValue]) + stats['value'] = dict(type='real', range=[0, max_value]) - createdCSVs.append(stats) + created_cvs_list.append(stats) print('----------------') @@ -59,20 +59,20 @@ def read_csv(medal_type='Total'): reader = csv.DictReader(csvfile, fieldnames=['Games', 'Sport', 'Event', 'Athlete(s)', 'CountryCode', 'CountryName', 'Medal', 'ResultInSeconds'], dialect='excel-tab') next(reader) - lastGames = None + last_games = None fieldnames = ['CountryCode'] medals_per_country = dict() for row in reader: - if row['Games'] != lastGames: + if row['Games'] != last_games: # write old year when a new year is detected - write_csv(lastGames, medal_type, fieldnames, medals_per_country) + write_csv(last_games, medal_type, fieldnames, medals_per_country) # clean up variables fieldnames = ['CountryCode'] medals_per_country = dict() - lastGames = row['Games'] + last_games = row['Games'] country = row['CountryCode'] # short-cut if row['Event'] not in fieldnames: @@ -91,7 +91,7 @@ def read_csv(medal_type='Total'): # print(row['Games'], row['Event'], country, row['Medal']) # write the last file - write_csv(lastGames, medal_type, fieldnames, medals_per_country) + write_csv(last_games, medal_type, fieldnames, medals_per_country) read_csv('Total') From 2db80c811ae1fba55b09432d83bf281bc65861ff Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 19 Dec 2019 09:24:22 +0100 Subject: [PATCH 34/40] change type of array in `Table` to string --- taco_server/src/diff_finder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taco_server/src/diff_finder.py b/taco_server/src/diff_finder.py index 0542f5a..13fc5db 100644 --- a/taco_server/src/diff_finder.py +++ b/taco_server/src/diff_finder.py @@ -139,8 +139,8 @@ def generate_diff_from_files(file1, file2): # Table data structure class Table: def __init__(self, rows, cols, content): - self.row_ids = np.asarray(rows, 'object') - self.col_ids = np.asarray(cols, 'object') + self.row_ids = np.asarray(rows, 'object').astype(str) + self.col_ids = np.asarray(cols, 'object').astype(str) self.content = content From f081ed5440950303c8242e7de871b534f1db2b04 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 19 Dec 2019 09:24:34 +0100 Subject: [PATCH 35/40] use custom JsonEncoder --- taco_server/src/diff_cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/taco_server/src/diff_cache.py b/taco_server/src/diff_cache.py index c027926..5b5039b 100644 --- a/taco_server/src/diff_cache.py +++ b/taco_server/src/diff_cache.py @@ -106,7 +106,7 @@ def get_diff_table(id1, id2, direction, ops, jsonit=True): else: # todo later find a way to send the error # e.g. there's no matching column in this case - json_result = json.dumps(diffobj) # which is {} for now! + json_result = json.dumps(diffobj, cls=json_encoder.JsonEncoder) # which is {} for now! set_diff_cache(hash_name, json_result) elif jsonit is False: @@ -153,10 +153,10 @@ def get_ratios(id1, id2, direction, ops, bins=1, bins_col=1, jsonit=True): # bin == 1 -> timeline bar chart # bin == -1 -> 2d ratio plot if bins == 1 or bins == -1: - json_ratios = json.dumps(ratios.serialize()) + json_ratios = json.dumps(ratios.serialize(), cls=json_encoder.JsonEncoder) # bin > 1 -> 2d ratio histogram else: - json_ratios = json.dumps(ratios) + json_ratios = json.dumps(ratios, cls=json_encoder.JsonEncoder) # cache this as overview set_diff_cache(hashname, json_ratios) From 5dd80ecff3d317f30057f284e6ae8e77930b95f8 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Thu, 2 Jan 2020 14:25:24 +0100 Subject: [PATCH 36/40] Use Debian `buster` as CircleCI Docker image --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 991be3d..bc4de12 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ jobs: build: working_directory: ~/phovea docker: - - image: circleci/python:3.7-node-browsers + - image: circleci/python:3.7-buster-node-browsers # for node version see Dockerfile on https://hub.docker.com/r/circleci/python steps: - checkout - run: From 041a240b1d8968f6889bb43c05e847fe24b96939 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Thu, 2 Jan 2020 14:25:44 +0100 Subject: [PATCH 37/40] Remove .gitlab-ci.yml --- .gitlab-ci.yml | 52 -------------------------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 1da2330..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,52 +0,0 @@ -image: circleci/python:3.7-node-browsers - -variables: - NODE_VERSION: "12.13" - GIT_DEPTH: "1" - -cache: - key: "$CI_REPOSITORY_URL-$CI_COMMIT_REF_NAME" - paths: - - ~/venv - -before_script: - # Install ssh-agent if not already installed, it is required by Docker. - # (change apt-get to yum if you use a CentOS-based image) - - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )' - - # Run ssh-agent (inside the build environment) - - eval $(ssh-agent -s) - - # Add the SSH key stored in SSH_PRIVATE_KEY variable to the agent store - - ssh-add <(echo "$SSH_PRIVATE_KEY") - - # For Docker builds disable host key checking. Be aware that by adding that - # you are suspectible to man-in-the-middle attacks. - # WARNING: Use this only with the Docker executor, if you use it with shell - # you will overwrite your user's SSH config. - - mkdir -p ~/.ssh - - '[[ -f /.dockerenv ]] && echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config' - -stages: - - install - - build - -install-pip-wee: - stage: install - script: | - (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y)) - virtualenv ~/venv - . ~/venv/bin/activate - pip install --upgrade --upgrade-strategy=only-if-needed -r requirements_dev.txt - pip install --upgrade --upgrade-strategy=only-if-needed -r requirements.txt - -dist: - stage: build - script: | - . ~/venv/bin/activate - npm run dist - allow_failure: false - artifacts: - expire_in: 1 week - paths: - - dist From c94221e9a331012a7124541d2521d8ceae39487d Mon Sep 17 00:00:00 2001 From: rumersdorfer <45141967+rumersdorfer@users.noreply.github.com> Date: Tue, 7 Jan 2020 17:57:52 +0100 Subject: [PATCH 38/40] Ignore package-lock.json --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a00b0c0..d283702 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ __pycache__/ # due to using tox and pytest .tox .cache +package-lock.json From e0308e80fb67cf49966b18f62026755f3365a113 Mon Sep 17 00:00:00 2001 From: dvvanessastoiber Date: Thu, 16 Jan 2020 14:47:33 +0100 Subject: [PATCH 39/40] Prepare release 3.0.0 --- package.json | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 7cad7c8..6e0d647 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "taco_server", "description": "The server part for comparing large tabular data using Phovea", - "version": "1.0.0-SNAPSHOT", + "version": "3.0.0", "author": { "name": "The Caleydo Team", "email": "contact@caleydo.org", diff --git a/requirements.txt b/requirements.txt index 6d96a8f..143e6c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ --e git+https://github.com/phovea/phovea_server.git@develop#egg=phovea_server +phovea_server>=4.0.0,<5.0.0 sklearn==0.0 From 25b95722b336a322b0388930814fde1280992aa6 Mon Sep 17 00:00:00 2001 From: Holger Stitz Date: Thu, 16 Jan 2020 14:53:32 +0100 Subject: [PATCH 40/40] Update package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 6e0d647..5d5085e 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,7 @@ "prebuild": "node -e \"process.exit(process.env.PHOVEA_SKIP_TESTS === undefined?1:0)\" || npm run test", "build": "rm -rf build/source && find . -name '*.pyc' -delete && node buildPython.js && cp -r ./taco_server build/source/", "predist": "npm run build && npm run docs", - "dist": "python setup.py sdist bdist_wheel && cd build && tar cvzf ../dist/taco_server.tar.gz *", + "dist": "python setup.py sdist bdist_wheel", "docs": "sphinx-apidoc -o docs -f ./taco_server && sphinx-build ./docs build/docs" } }