Add tox and pytest to gensim, integration with Travis and Appveyor. Fix

#1613, 1644 (#1721) * remove flake8 config from setup.cfg * create distinct test_env for win * ignore stuff from tox * basic tox config * add global env vars for full test run * force-recreate for envs * show top20 slowest tests * add upload/download wheels/docs * fix E501 [1] * fix E501 [2] * fix E501 [3] * fix E501 [4] * fix E501 [5] * fix E501 [6] * travis + tox * Install tox for travis * simplify travis file * more verbosity with tox * Fix numpy scipy versions * Try to avoid pip install hang * Fix tox * Add build_ext * Fix dtm test * remove install/run sh * Fix imports & indentation * remove flake-diff * Add docs building to Travis * join flake8 and docs to one job * add re-run for failed tests (to avoid FP) + calculate code coverage * fix WR segfault (veeeery buggy implementation) * attempt to make multiOS configuration * fix mistake with cython * Try to fix appveyor wheels problem * Remove commented parts & add cache for travis
piskvorky · Nov 20, 2017 · 8766edc · 8766edc
1 parent fff6529
commit 8766edc
Show file tree

Hide file tree

Showing 81 changed files with 1,070 additions and 601 deletions.
diff --git a/.gitignore b/.gitignore
@@ -40,6 +40,8 @@ Thumbs.db
 
 # Other #
 #########
+.tox/
+.cache/
 .project
 .pydevproject
 .ropeproject

diff --git a/.travis.yml b/.travis.yml
@@ -5,18 +5,24 @@ cache:
   directories:
   - $HOME/.cache/pip
   - $HOME/.ccache
-
+  - $HOME/.pip-cache
 dist: trusty
 language: python
 
 
 matrix:
   include:
-    - env: PYTHON_VERSION="2.7" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="yes"
-    - env: PYTHON_VERSION="2.7" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="no"
-    - env: PYTHON_VERSION="3.5" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="no"
-    - env: PYTHON_VERSION="3.6" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="no"
+    - python: '2.7'
+      env: TOXENV="flake8, docs"
+
+    - python: '2.7'
+      env: TOXENV="py27-linux"
+
+    - python: '3.5'
+      env: TOXENV="py35-linux"
 
+    - python: '3.6'
+      env: TOXENV="py36-linux"
 
-install: source continuous_integration/travis/install.sh
-script: bash continuous_integration/travis/run.sh
+install: pip install tox
+script: tox -vv
diff --git a/appveyor.yml b/appveyor.yml
@@ -13,29 +13,20 @@ environment:
       secure: qXqY3dFmLOqvxa3Om2gQi/BjotTOK+EP2IPLolBNo0c61yDtNWxbmE4wH3up72Be
 
   matrix:
-    # - PYTHON: "C:\\Python27"
-    #   PYTHON_VERSION: "2.7.12"
-    #   PYTHON_ARCH: "32"
-
     - PYTHON: "C:\\Python27-x64"
       PYTHON_VERSION: "2.7.12"
       PYTHON_ARCH: "64"
-
-    # - PYTHON: "C:\\Python35"
-    #   PYTHON_VERSION: "3.5.2"
-    #   PYTHON_ARCH: "32"
+      TOXENV: "py27-win"
 
     - PYTHON: "C:\\Python35-x64"
       PYTHON_VERSION: "3.5.2"
       PYTHON_ARCH: "64"
-
-    # - PYTHON: "C:\\Python36"
-    #   PYTHON_VERSION: "3.6.0"
-    #   PYTHON_ARCH: "32"
+      TOXENV: "py35-win"
 
     - PYTHON: "C:\\Python36-x64"
       PYTHON_VERSION: "3.6.0"
       PYTHON_ARCH: "64"
+      TOXENV: "py36-win"
 
 init:
   - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
@@ -57,48 +48,16 @@ install:
   # not already installed.
   - "powershell ./continuous_integration/appveyor/install.ps1"
   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
-  - "python -m pip install -U pip"
+  - "python -m pip install -U pip tox"
 
   # Check that we have the expected version and architecture for Python
   - "python --version"
   - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
 
-  # Install the build and runtime dependencies of the project.
-  - "%CMD_IN_ENV% pip install --timeout=60 --trusted-host 28daf2247a33ed269873-7b1aad3fab3cc330e1fd9d109892382a.r6.cf2.rackcdn.com -r continuous_integration/appveyor/requirements.txt"
-  - "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst"
-  - ps: "ls dist"
-
-  # Install the genreated wheel package to test it
-  - "pip install --pre --no-index --find-links dist/ gensim"
-
-# Not a .NET project, we build scikit-learn in the install step instead
 build: false
 
 test_script:
-  # Change to a non-source folder to make sure we run the tests on the
-  # installed library.
-  - "mkdir empty_folder"
-  - "cd empty_folder"
-  - "pip install pyemd testfixtures sklearn Morfessor==2.0.2a4"
-  - "pip freeze"
-  - "python -c \"import nose; nose.main()\" -s -v gensim"
-  # Move back to the project folder
-  - "cd .."
-
-artifacts:
-  # Archive the generated wheel package in the ci.appveyor.com build report.
-  - path: dist\*
-on_success:
-  # Upload the generated wheel package to Rackspace
-  # On Windows, Apache Libcloud cannot find a standard CA cert bundle so we
-  # disable the ssl checks.
-  - "python -m wheelhouse_uploader upload --no-ssl-check --local-folder=dist gensim-windows-wheels"
-
-notifications:
-  - provider: Webhook
-    url: https://webhooks.gitter.im/e/62c44ad26933cd7ed7e8
-    on_build_success: false
-    on_build_failure: True
+  - tox -vv
 
 cache:
   # Use the appveyor cache to avoid re-downloading large archives such

diff --git a/continuous_integration/travis/flake8_diff.sh b/continuous_integration/travis/flake8_diff.sh
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
diff --git a/continuous_integration/travis/run.sh b/continuous_integration/travis/run.sh
diff --git a/gensim/corpora/indexedcorpus.py b/gensim/corpora/indexedcorpus.py
@@ -56,7 +56,8 @@ def __init__(self, fname, index_fname=None):
         self.length = None
 
     @classmethod
-    def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None, labels=None, metadata=False):
+    def serialize(serializer, fname, corpus, id2word=None, index_fname=None,
+                  progress_cnt=None, labels=None, metadata=False):
         """
         Iterate through the document stream `corpus`, saving the documents to `fname`
         and recording byte offset of each document. Save the resulting index
@@ -93,7 +94,9 @@ def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progres
         offsets = serializer.save_corpus(fname, corpus, id2word, **kwargs)
 
         if offsets is None:
-            raise NotImplementedError("Called serialize on class %s which doesn't support indexing!" % serializer.__name__)
+            raise NotImplementedError(
+                "Called serialize on class %s which doesn't support indexing!" % serializer.__name__
+            )
 
         # store offsets persistently, using pickle
         # we shouldn't have to worry about self.index being a numpy.ndarray as the serializer will return

diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py
@@ -77,7 +77,8 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
             for doc in self:
                 all_terms.update(word for word, wordCnt in doc)
             all_terms = sorted(all_terms)  # sort the list of all words; rank in that list = word's integer id
-            self.id2word = dict(izip(xrange(len(all_terms)), all_terms))  # build a mapping of word id(int) -> word (string)
+            # build a mapping of word id(int) -> word (string)
+            self.id2word = dict(izip(xrange(len(all_terms)), all_terms))
         else:
             logger.info("using provided word mapping (%i ids)", len(id2word))
             self.id2word = id2word

diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py
@@ -456,7 +456,10 @@ def resize_shards(self, shardsize):
             for old_shard_n, old_shard_name in enumerate(old_shard_names):
                 os.remove(old_shard_name)
         except Exception as e:
-            logger.error('Exception occurred during old shard no. %d removal: %s.\nAttempting to at least move new shards in.', old_shard_n, str(e))
+            logger.error(
+                'Exception occurred during old shard no. %d removal: %s.\nAttempting to at least move new shards in.',
+                old_shard_n, str(e)
+            )
         finally:
             # If something happens with cleaning up - try to at least get the
             # new guys in.
@@ -673,7 +676,10 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
         Returns the resulting s_result.
         """
         if (result_stop - result_start) != (stop - start):
-            raise ValueError('Result start/stop range different than stop/start range (%d - %d vs. %d - %d)'.format(result_start, result_stop, start, stop))
+            raise ValueError(
+                'Result start/stop range different than stop/start range (%d - %d vs. %d - %d)'
+                .format(result_start, result_stop, start, stop)
+            )
 
         # Dense data: just copy using numpy's slice notation
         if not self.sparse_serialization:
@@ -685,7 +691,10 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
         # result.
         else:
             if s_result.shape != (result_start, self.dim):
-                raise ValueError('Assuption about sparse s_result shape invalid: {0} expected rows, {1} real rows.'.format(result_start, s_result.shape[0]))
+                raise ValueError(
+                    'Assuption about sparse s_result shape invalid: {0} expected rows, {1} real rows.'
+                    .format(result_start, s_result.shape[0])
+                )
 
             tmp_matrix = self.current_shard[start:stop]
             s_result = sparse.vstack([s_result, tmp_matrix])
@@ -786,7 +795,8 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False,
         ShardedCorpus(fname, corpus, **kwargs)
 
     @classmethod
-    def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None, labels=None, metadata=False, **kwargs):
+    def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None,
+                  labels=None, metadata=False, **kwargs):
         """
         Iterate through the document stream `corpus`, saving the documents
         as a ShardedCorpus to `fname`.

diff --git a/gensim/corpora/svmlightcorpus.py b/gensim/corpora/svmlightcorpus.py
@@ -119,7 +119,8 @@ def line2doc(self, line):
         if not parts:
             raise ValueError('invalid line format in %s' % self.fname)
         target, fields = parts[0], [part.rsplit(':', 1) for part in parts[1:]]
-        doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != 'qid']  # ignore 'qid' features, convert 1-based feature ids to 0-based
+        # ignore 'qid' features, convert 1-based feature ids to 0-based
+        doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != 'qid']
         return doc, target
 
     @staticmethod