Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into lda_tutorials_te…
Browse files Browse the repository at this point in the history
…xt_fix
  • Loading branch information
mpenkov committed Mar 18, 2022
2 parents 00b54dc + a936521 commit f3e1671
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 43 deletions.
85 changes: 64 additions & 21 deletions .github/workflows/build-wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: ['3.7', '3.8', '3.9', '3.10']
os: [ubuntu-latest, macos-latest, windows-latest]
platform: [x64]
include:
Expand All @@ -43,11 +43,6 @@ jobs:
# https://github.com/scipy/oldest-supported-numpy/blob/master/setup.cfg
# with the exception that we enforce the minimum version to be 1.17.0.
#
- os: ubuntu-latest
manylinux-version: 2010
python-version: 3.6
build-depends: numpy==1.17.0

- os: ubuntu-latest
manylinux-version: 2010
python-version: 3.7
Expand All @@ -63,11 +58,10 @@ jobs:
python-version: 3.9
build-depends: numpy==1.19.3

- os: macos-latest
travis-os-name: osx
manylinux-version: 1
python-version: 3.6
build-depends: numpy==1.17.0
- os: ubuntu-latest
manylinux-version: 2014
python-version: "3.10"
build-depends: numpy==1.22.2 scipy==1.8.0

- os: macos-latest
travis-os-name: osx
Expand All @@ -87,10 +81,11 @@ jobs:
python-version: 3.9
build-depends: numpy==1.19.3

- os: windows-latest
manylinux-version: 2010
python-version: 3.6
build-depends: numpy==1.17.0
- os: macos-latest
travis-os-name: osx
manylinux-version: 1
python-version: "3.10"
build-depends: numpy==1.22.2 scipy==1.8.0

- os: windows-latest
manylinux-version: 2010
Expand All @@ -107,14 +102,19 @@ jobs:
python-version: 3.9
build-depends: numpy==1.19.3

- os: windows-latest
manylinux-version: 2010
python-version: "3.10"
build-depends: numpy==1.22.2 scipy==1.8.0

env:
PKG_NAME: gensim
REPO_DIR: gensim
BUILD_COMMIT: HEAD
PLAT: x86_64
UNICODE_WIDTH: 32
MB_PYTHON_VERSION: ${{ matrix.python-version }} # MB_PYTHON_VERSION is needed by Multibuild
TEST_DEPENDS: Morfessor==2.0.2a4 python-levenshtein==0.12.0 visdom==0.1.8.9 pytest pytest-cov mock cython nmslib pyemd testfixtures scikit-learn pyemd
TEST_DEPENDS: pytest mock testfixtures
DOCKER_TEST_IMAGE: multibuild/xenial_x86_64
TRAVIS_OS_NAME: ${{ matrix.travis-os-name }}
SKIP_NETWORK_TESTS: 1
Expand Down Expand Up @@ -144,7 +144,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install virtualenv
- name: Build and Install Wheels (Multibuild)
- name: Build Wheel (Multibuild)
if: matrix.os != 'windows-latest'
run: |
echo ::group::Set up Multibuild
Expand All @@ -156,17 +156,16 @@ jobs:
before_install
echo ::endgroup::
echo ::group::Build wheel
find . -type f -name "*.egg" -exec rm -v {} \;
build_wheel $REPO_DIR ${{ matrix.PLAT }}
echo ::endgroup::
echo ::group::Install run
install_run ${{ matrix.PLAT }}
echo ::endgroup::
#
# We can't use multibuild on Windows, so we have to roll our own build script.
# Adapted from
# https://github.com/RaRe-Technologies/gensim-wheels/commit/084b863390edee05bbe15d4ec05d1ab726e52202
#
- name: Build and Install Wheels (Windows)
- name: Build Wheel (Windows)
if: matrix.os == 'windows-latest'
run: |
echo ::group::Set up dependencies
Expand All @@ -190,6 +189,50 @@ jobs:
#
mv dist wheelhouse
- name: Prepare for testing
run: |
#
# FIXME: Why are these eggs here?
#
# These eggs prevent the wheel from building and running on Py3.10
#
find . -type f -name "*.egg" -exec rm -v {} \;
python -m venv test_environment
#
# Multibuild has a test step but it essentially just installs the wheel
# and runs the test, and requires a lot of magic to get it working.
# It also does not work under Windows.
# So, we create our own simple test step here.
#
- name: Install and Test Wheel (Linux, MacOS)
if: matrix.os != 'windows-latest'
run: |
. test_environment/bin/activate
pip install pytest testfixtures mock
pip install wheelhouse/*.whl
cd test_environment
python -c 'import gensim;print(gensim.__version__)'
#
# This part relies on the wheel containing tests and required data.
# If we remove that from the wheel, we'll need to rewrite this step.
#
pytest -rfxEXs --durations=20 --disable-warnings --showlocals --pyargs gensim
#
# We need a separate testing step for windows because the command for
# activating the virtual environment is slightly different
#
- name: Install and Test Wheel (Windows)
if: matrix.os == 'windows-latest'
run: |
test_environment/Scripts/activate.bat
pip install pytest testfixtures mock
pip install wheelhouse/*.whl
cd test_environment
python -c 'import gensim;print(gensim.__version__)'
pytest -rfxEXs --durations=20 --disable-warnings --showlocals --pyargs gensim
- name: Upload wheels to s3://gensim-wheels
#
# Only do this if the credentials are set.
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ jobs:
fail-fast: false
matrix:
include:
- {name: Linux, python: 3.6, os: ubuntu-20.04, tox: 'flake8,flake8-docs'}
- {name: Linux, python: 3.6, os: ubuntu-20.04, tox: 'py36-linux'}
- {name: Linux, python: 3.7, os: ubuntu-20.04, tox: 'flake8,flake8-docs'}
- {name: Linux, python: 3.7, os: ubuntu-20.04, tox: 'py37-linux'}
- {name: Linux, python: 3.8, os: ubuntu-20.04, tox: 'py38-linux'}
- {name: Windows, python: 3.6, os: windows-2019, tox: 'py36-win'}
- {name: Linux, python: 3.8, os: ubuntu-20.04, tox: 'py38-linux-cov'}
- {name: Linux, python: 3.9, os: ubuntu-20.04, tox: 'py39-linux'}
- {name: Linux, python: '3.10', os: ubuntu-20.04, tox: 'py310-linux'}
- {name: Windows, python: 3.7, os: windows-2019, tox: 'py37-win'}
- {name: Windows, python: 3.8, os: windows-2019, tox: 'py38-win'}
- {name: Windows, python: 3.9, os: windows-2019, tox: 'py39-win'}
- {name: Windows, python: '3.10', os: windows-2019, tox: 'py310-win'}
env:
TOX_PARALLEL_NO_SPINNER: 1

Expand Down Expand Up @@ -72,4 +74,4 @@ jobs:
run: |
pwd
COREFILE=$(find . -maxdepth 1 -name "core*" | head -n 1)
if [[ -f "$COREFILE" ]]; then EXECFILE=$(gdb -c "$COREFILE" -batch | grep "Core was generated" | tr -d "\`" | cut -d' ' -f5); file "$COREFILE"; gdb -c "$COREFILE" "$EXECFILE" -x continuous_integration/debug.gdb -batch; fi
if [[ -f "$COREFILE" ]]; then EXECFILE=$(gdb -c "$COREFILE" -batch | grep "Core was generated" | tr -d "\`" | cut -d' ' -f5); file "$COREFILE"; gdb -c "$COREFILE" "$EXECFILE" -x continuous_integration/debug.gdb -batch; fi
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Changes
* [#3250](https://github.com/RaRe-Technologies/gensim/pull/3250): Make negative ns_exponent work correctly, by [@menshikh-iv](https://github.com/menshikh-iv)
* [#3258](https://github.com/RaRe-Technologies/gensim/pull/3258): Adding another check to _check_corpus_sanity for compressed files, adding test, by [@dchaplinsky](https://github.com/dchaplinsky)
* [#3274](https://github.com/RaRe-Technologies/gensim/pull/3274): Migrate setup.py from distutils to setuptools, by [@geojacobm6](https://github.com/geojacobm6)
* [#3286](https://github.com/RaRe-Technologies/gensim/pull/3286): Fixes 'not enough arguments for format string' error, by [@gilbertfrancois](https://github.com/gilbertfrancois)

## 4.1.2, 2021-09-17

Expand Down
14 changes: 13 additions & 1 deletion config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,20 @@ function build_wheel_cmd {
function run_tests {
# Runs tests on installed distribution from an empty directory
set -x
python --version
pip freeze
pytest -rfxEXs --durations=20 --disable-warnings --showlocals --pyargs gensim
set +x
}

#
# We do this here because we want to upgrade pip before the wheel gets installed.
# docker_test_wrap.sh sources this file before the wheel install. The sourcing
# happens from multiple places, and some of the Python versions can be really
# ancient (e.g. when working outside a virtual environment, using the default
# Python install).
#
# We don't use pip to do the actual upgrade because something appears broken
# with the default pip on the Python 3.10 multibuild image. This is really
# dodgy, but I couldn't work out a better way to get this done.
#
python continuous_integration/upgrade_pip_py310.py
10 changes: 10 additions & 0 deletions continuous_integration/upgrade_pip_py310.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This script needs to be able run under both Python 2 and 3 without crashing
# It only achieves the desired effect under Py3.10 on Linux and MacOS.
import subprocess
import sys
import tempfile
if sys.platform in ('linux', 'darwin') and sys.version_info[:2] == (3, 10):
import urllib.request
with tempfile.NamedTemporaryFile(suffix='.py') as fout:
urllib.request.urlretrieve("https://bootstrap.pypa.io/get-pip.py", fout.name)
subprocess.call([sys.executable, fout.name])
4 changes: 3 additions & 1 deletion gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,9 @@ def _scan_vocab(self, corpus_iterable, progress_per, trim_rule):
logger.warning(
"Highest int doctag (%i) larger than count of documents (%i). This means "
"at least %i excess, unused slots (%i bytes) will be allocated for vectors.",
max_rawint, corpus_count, ((max_rawint - corpus_count) * self.vector_size * 4))
max_rawint, corpus_count, max_rawint - corpus_count,
(max_rawint - corpus_count) * self.vector_size * dtype(REAL).itemsize,
)
if max_rawint > -1:
# adjust indexes/list to account for range of pure-int keyed doctags
for key in doctags_list:
Expand Down
8 changes: 5 additions & 3 deletions gensim/test/test_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
import unittest
import os
import shutil
import subprocess
import struct
import sys
Expand Down Expand Up @@ -44,7 +45,8 @@
BUCKET = 10000

FT_HOME = os.environ.get("FT_HOME")
FT_CMD = os.path.join(FT_HOME, "fasttext") if FT_HOME else None
FT_CMD = shutil.which("fasttext", path=FT_HOME) or \
shutil.which("fasttext")


new_sentences = [
Expand Down Expand Up @@ -1661,7 +1663,7 @@ def _save_test_model(out_base_fname, model_params):
subprocess.check_call(cmd)


@unittest.skipIf(not FT_HOME, "FT_HOME env variable not set, skipping test")
@unittest.skipIf(not FT_CMD, "fasttext not in FT_HOME or PATH, skipping test")
class SaveFacebookByteIdentityTest(unittest.TestCase):
"""
This class containts tests that check the following scenario:
Expand Down Expand Up @@ -1708,7 +1710,7 @@ def line_to_array(line):
return np.array([line_to_array(line) for line in out.splitlines()], dtype=np.float32)


@unittest.skipIf(not os.environ.get("FT_HOME", None), "FT_HOME env variable not set, skipping test")
@unittest.skipIf(not FT_CMD, "fasttext not in FT_HOME or PATH, skipping test")
class SaveFacebookFormatReadingTest(unittest.TestCase):
"""
This class containts tests that check the following scenario:
Expand Down
7 changes: 4 additions & 3 deletions gensim/test/test_translation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging

import numpy as np
import pytest

from scipy.spatial.distance import cosine
from gensim.models.doc2vec import Doc2Vec
Expand Down Expand Up @@ -61,9 +62,9 @@ def test_translate_nn(self):
for idx, item in enumerate(self.test_word_pairs):
self.assertTrue(item[1] in translated_words[item[0]])

@unittest.skipIf(
(sys.version_info.major == 3) and (sys.version_info.minor == 9) and (sys.platform == 'darwin'),
'blinking test, can be related to <https://github.com/RaRe-Technologies/gensim/issues/2977>'
@pytest.mark.xfail(
sys.platform == 'darwin',
reason='blinking test, can be related to <https://github.com/RaRe-Technologies/gensim/issues/2977>'
)
def test_translate_gc(self):
# Test globally corrected neighbour retrieval method
Expand Down
2 changes: 1 addition & 1 deletion gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ def test_parallel(self):
# the exact vectors and therefore similarities may differ, due to different thread collisions/randomization
# so let's test only for top10
neighbor_rank = [word for word, sim in sims].index(expected_neighbor)
self.assertLess(neighbor_rank, 2)
self.assertLess(neighbor_rank, 3)

def test_r_n_g(self):
"""Test word2vec results identical with identical RNG seed."""
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,9 @@ def run(self):
core_testenv = [
'pytest',
'pytest-cov',
# 'pytest-rerunfailures', # disabled 2020-08-28 for <https://github.com/pytest-dev/pytest-rerunfailures/issues/128>
'mock',
'cython',
'testfixtures',
'Morfessor>=2.0.2a4',
]

if not (sys.platform.lower().startswith("win") and sys.version_info[:2] >= (3, 9)):
Expand Down Expand Up @@ -320,13 +318,12 @@ def run(self):
# to build with any sane version of Cython, so we should update this pin
# periodically.
#
CYTHON_STR = 'Cython==0.29.23'
CYTHON_STR = 'Cython==0.29.28'

install_requires = [
NUMPY_STR,
'scipy >= 0.18.1',
'smart_open >= 1.8.1',
"dataclasses; python_version < '3.7'", # pre-py3.7 needs `dataclasses` backport for use of `dataclass` in doc2vec.py
]

setup_requires = [NUMPY_STR]
Expand All @@ -349,6 +346,9 @@ def run(self):
author_email='me@radimrehurek.com',

url='http://radimrehurek.com/gensim',
project_urls={
'Source': 'https://github.com/RaRe-Technologies/gensim',
},
download_url='http://pypi.python.org/pypi/gensim',

license='LGPL-2.1-only',
Expand Down
11 changes: 7 additions & 4 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tox]
minversion = 2.0
envlist = {py36,py37,py38, py39}-{win,linux}, flake8, docs, docs-upload, download-wheels, upload-wheels, test-pypi
envlist = {py37,py38,py39,py310}-{win,linux}, py38-linux-cov, flake8, docs, docs-upload, download-wheels, upload-wheels, test-pypi
skipsdist = True
platform = linux: linux
win: win64
Expand Down Expand Up @@ -42,9 +42,11 @@ exclude_lines =

ignore_errors = True

#
# Conditional factors https://tox.wiki/en/latest/config.html#factors
#
[pytest]
addopts = -rfxEXs --durations=20 --showlocals --cov=gensim/ --cov-report=xml

addopts = -rfxEXs --durations=20 --showlocals

[testenv]
recreate = True
Expand Down Expand Up @@ -72,7 +74,8 @@ commands =
python --version
pip --version
python setup.py build_ext --inplace
pytest {posargs:gensim/test}
cov: pytest {posargs:gensim/test} --cov=gensim/ --cov-report=xml
!cov: pytest {posargs:gensim/test}


[testenv:flake8]
Expand Down

0 comments on commit f3e1671

Please sign in to comment.