From 3db9c639d198f52b232b1d118db38cd464243a9e Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 3 Jan 2018 20:38:57 +0100 Subject: [PATCH 01/20] Cythonize fasttext.ft_hash for 100x performance improvement --- gensim/models/utils_any2vec.py | 53 +- gensim/models/utils_any2vec_fast.c | 2015 ++++++++++++++++++++++++++ gensim/models/utils_any2vec_fast.pyx | 12 + 3 files changed, 2055 insertions(+), 25 deletions(-) create mode 100644 gensim/models/utils_any2vec_fast.c create mode 100644 gensim/models/utils_any2vec_fast.pyx diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py index f0bc9077ed..416fe89166 100644 --- a/gensim/models/utils_any2vec.py +++ b/gensim/models/utils_any2vec.py @@ -17,6 +17,34 @@ logger = logging.getLogger(__name__) +try: + from gensim.models.utils_any2vec_fast import ft_hash as _ft_hash +except ImportError: + # failed... fall back to plain python (~100x slower than the above) + def _ft_hash(string): + """Reproduces [hash method](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc) + used in [1]_. + + Parameter + --------- + string : str + The string whose hash needs to be calculated + + Returns + ------- + int + The hash of the string + + """ + # Runtime warnings for integer overflow are raised, this is expected behaviour. These warnings are suppressed. + old_settings = np.seterr(all='ignore') + h = np.uint32(2166136261) + for c in string: + h = h ^ np.uint32(ord(c)) + h = h * np.uint32(16777619) + np.seterr(**old_settings) + return h + def _compute_ngrams(word, min_n, max_n): """Returns the list of all possible ngrams for a given word. @@ -45,31 +73,6 @@ def _compute_ngrams(word, min_n, max_n): return ngrams -def _ft_hash(string): - """Reproduces [hash method](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc) - used in [1]_. - - Parameter - --------- - string : str - The string whose hash needs to be calculated - - Returns - ------- - int - The hash of the string - - """ - # Runtime warnings for integer overflow are raised, this is expected behaviour. These warnings are suppressed. - old_settings = np.seterr(all='ignore') - h = np.uint32(2166136261) - for c in string: - h = h ^ np.uint32(ord(c)) - h = h * np.uint32(16777619) - np.seterr(**old_settings) - return h - - def _save_word2vec_format(fname, vocab, vectors, fvocab=None, binary=False, total_vec=None): """Store the input-hidden weight matrix in the same format used by the original C word2vec-tool, for compatibility. diff --git a/gensim/models/utils_any2vec_fast.c b/gensim/models/utils_any2vec_fast.c new file mode 100644 index 0000000000..3980e8462b --- /dev/null +++ b/gensim/models/utils_any2vec_fast.c @@ -0,0 +1,2015 @@ +/* Generated by Cython 0.25.2 */ + +/* BEGIN: Cython Metadata +{ + "distutils": {}, + "module_name": "gensim.models.utils_any2vec_fast" +} +END: Cython Metadata */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) + #error Cython requires Python 2.6+ or Python 3.2+. +#else +#define CYTHON_ABI "0_25_2" +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#ifndef HAVE_LONG_LONG + #if PY_VERSION_HEX >= 0x03030000 || (PY_MAJOR_VERSION == 2 && PY_VERSION_HEX >= 0x02070000) + #define HAVE_LONG_LONG + #endif +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#ifdef PYPY_VERSION + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 +#elif defined(PYSTON_VERSION) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #if PY_VERSION_HEX < 0x02070000 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #elif !defined(CYTHON_USE_PYLONG_INTERNALS) + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #undef SHIFT + #undef BASE + #undef MASK +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) + #define Py_OptimizeFlag 0 +#endif +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyClass_Type +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyType_Type +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject **args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #define __Pyx_PyCFunctionFast _PyCFunctionFast +#endif +#if CYTHON_FAST_PYCCALL +#define __Pyx_PyFastCFunction_Check(func)\ + ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST))))) +#else +#define __Pyx_PyFastCFunction_Check(func) 0 +#endif +#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111) + #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_PYSTON + #define __Pyx_PyCode_HasFreeVars(co) PyCode_HasFreeVars(co) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) +#endif +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t PyInt_AsLong +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) +#else + #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_MAYBE_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#if defined(WIN32) || defined(MS_WINDOWS) + #define _USE_MATH_DEFINES +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + + +#define __PYX_ERR(f_index, lineno, Ln_error) \ +{ \ + __pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \ +} + +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif + +#ifndef __PYX_EXTERN_C + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__gensim__models__utils_any2vec_fast +#define __PYX_HAVE_API__gensim__models__utils_any2vec_fast +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#ifdef PYREX_WITHOUT_ASSERTIONS +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) && defined (_M_X64) + #define __Pyx_sst_abs(value) _abs64(value) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#if PY_MAJOR_VERSION < 3 +static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) +{ + const Py_UNICODE *u_end = u; + while (*u_end++) ; + return (size_t)(u_end - u - 1); +} +#else +#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen +#endif +#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) +#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +#define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x)) +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ + +static PyObject *__pyx_m; +static PyObject *__pyx_d; +static PyObject *__pyx_b; +static PyObject *__pyx_empty_tuple; +static PyObject *__pyx_empty_bytes; +static PyObject *__pyx_empty_unicode; +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm= __FILE__; +static const char *__pyx_filename; + + +static const char *__pyx_f[] = { + "gensim/models/utils_any2vec_fast.pyx", +}; + +/*--- Type declarations ---*/ + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, int); + void (*DECREF)(void*, PyObject*, int); + void (*GOTREF)(void*, PyObject*, int); + void (*GIVEREF)(void*, PyObject*, int); + void* (*SetupContext)(const char*, int, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) +#endif + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* ArgTypeTest.proto */ +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact); + +/* unicode_iter.proto */ +static CYTHON_INLINE int __Pyx_init_unicode_iteration( + PyObject* ustring, Py_ssize_t *length, void** data, int *kind); + +/* CodeObjectCache.proto */ +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CheckBinaryVersion.proto */ +static int __Pyx_check_binary_version(void); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + + +/* Module declarations from 'gensim.models.utils_any2vec_fast' */ +#define __Pyx_MODULE_NAME "gensim.models.utils_any2vec_fast" +int __pyx_module_is_main_gensim__models__utils_any2vec_fast = 0; + +/* Implementation of 'gensim.models.utils_any2vec_fast' */ +static const char __pyx_k_c[] = "c"; +static const char __pyx_k_h[] = "h"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_string[] = "string"; +static const char __pyx_k_ft_hash[] = "ft_hash"; +static const char __pyx_k_home_jojo_projekte_gensim_gensi[] = "/home/jojo/projekte/gensim/gensim/models/utils_any2vec_fast.pyx"; +static const char __pyx_k_gensim_models_utils_any2vec_fast[] = "gensim.models.utils_any2vec_fast"; +static PyObject *__pyx_n_s_c; +static PyObject *__pyx_n_s_ft_hash; +static PyObject *__pyx_n_s_gensim_models_utils_any2vec_fast; +static PyObject *__pyx_n_s_h; +static PyObject *__pyx_kp_s_home_jojo_projekte_gensim_gensi; +static PyObject *__pyx_n_s_main; +static PyObject *__pyx_n_s_string; +static PyObject *__pyx_n_s_test; +static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ +static PyObject *__pyx_tuple_; +static PyObject *__pyx_codeobj__2; + +/* "gensim/models/utils_any2vec_fast.pyx":7 + * # coding: utf-8 + * + * def ft_hash(unicode string): # <<<<<<<<<<<<<< + * cdef unsigned int h = 2166136261 + * for c in string: + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static PyMethodDef __pyx_mdef_6gensim_6models_18utils_any2vec_fast_1ft_hash = {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash, METH_O, 0}; +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("ft_hash (wrapper)", 0); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) __PYX_ERR(0, 7, __pyx_L1_error) + __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(__pyx_self, ((PyObject*)__pyx_v_string)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { + unsigned int __pyx_v_h; + Py_UCS4 __pyx_v_c; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + Py_ssize_t __pyx_t_3; + void *__pyx_t_4; + int __pyx_t_5; + int __pyx_t_6; + Py_ssize_t __pyx_t_7; + PyObject *__pyx_t_8 = NULL; + __Pyx_RefNannySetupContext("ft_hash", 0); + + /* "gensim/models/utils_any2vec_fast.pyx":8 + * + * def ft_hash(unicode string): + * cdef unsigned int h = 2166136261 # <<<<<<<<<<<<<< + * for c in string: + * h ^= ord(c) + */ + __pyx_v_h = 0x811C9DC5; + + /* "gensim/models/utils_any2vec_fast.pyx":9 + * def ft_hash(unicode string): + * cdef unsigned int h = 2166136261 + * for c in string: # <<<<<<<<<<<<<< + * h ^= ord(c) + * h *= 16777619 + */ + if (unlikely(__pyx_v_string == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' is not iterable"); + __PYX_ERR(0, 9, __pyx_L1_error) + } + __Pyx_INCREF(__pyx_v_string); + __pyx_t_1 = __pyx_v_string; + __pyx_t_6 = __Pyx_init_unicode_iteration(__pyx_t_1, (&__pyx_t_3), (&__pyx_t_4), (&__pyx_t_5)); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 9, __pyx_L1_error) + for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7++) { + __pyx_t_2 = __pyx_t_7; + __pyx_v_c = __Pyx_PyUnicode_READ(__pyx_t_5, __pyx_t_4, __pyx_t_2); + + /* "gensim/models/utils_any2vec_fast.pyx":10 + * cdef unsigned int h = 2166136261 + * for c in string: + * h ^= ord(c) # <<<<<<<<<<<<<< + * h *= 16777619 + * return h + */ + __pyx_v_h = (__pyx_v_h ^ ((long)__pyx_v_c)); + + /* "gensim/models/utils_any2vec_fast.pyx":11 + * for c in string: + * h ^= ord(c) + * h *= 16777619 # <<<<<<<<<<<<<< + * return h + */ + __pyx_v_h = (__pyx_v_h * 0x1000193); + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":12 + * h ^= ord(c) + * h *= 16777619 + * return h # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_8 = __Pyx_PyInt_From_unsigned_int(__pyx_v_h); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 12, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_r = __pyx_t_8; + __pyx_t_8 = 0; + goto __pyx_L0; + + /* "gensim/models/utils_any2vec_fast.pyx":7 + * # coding: utf-8 + * + * def ft_hash(unicode string): # <<<<<<<<<<<<<< + * cdef unsigned int h = 2166136261 + * for c in string: + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef __pyx_moduledef = { + #if PY_VERSION_HEX < 0x03020000 + { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, + #else + PyModuleDef_HEAD_INIT, + #endif + "utils_any2vec_fast", + 0, /* m_doc */ + -1, /* m_size */ + __pyx_methods /* m_methods */, + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; +#endif + +static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, + {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, + {&__pyx_n_s_gensim_models_utils_any2vec_fast, __pyx_k_gensim_models_utils_any2vec_fast, sizeof(__pyx_k_gensim_models_utils_any2vec_fast), 0, 0, 1, 1}, + {&__pyx_n_s_h, __pyx_k_h, sizeof(__pyx_k_h), 0, 0, 1, 1}, + {&__pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_k_home_jojo_projekte_gensim_gensi, sizeof(__pyx_k_home_jojo_projekte_gensim_gensi), 0, 0, 1, 0}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_string, __pyx_k_string, sizeof(__pyx_k_string), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} +}; +static int __Pyx_InitCachedBuiltins(void) { + return 0; +} + +static int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "gensim/models/utils_any2vec_fast.pyx":7 + * # coding: utf-8 + * + * def ft_hash(unicode string): # <<<<<<<<<<<<<< + * cdef unsigned int h = 2166136261 + * for c in string: + */ + __pyx_tuple_ = PyTuple_Pack(3, __pyx_n_s_string, __pyx_n_s_h, __pyx_n_s_c); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 7, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_ft_hash, 7, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) __PYX_ERR(0, 7, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_InitGlobals(void) { + if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + return 0; + __pyx_L1_error:; + return -1; +} + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC initutils_any2vec_fast(void); /*proto*/ +PyMODINIT_FUNC initutils_any2vec_fast(void) +#else +PyMODINIT_FUNC PyInit_utils_any2vec_fast(void); /*proto*/ +PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) +#endif +{ + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannyDeclarations + #if CYTHON_REFNANNY + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); + if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); + } + #endif + __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_utils_any2vec_fast(void)", 0); + if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + #ifdef WITH_THREAD /* Python build with threading support? */ + PyEval_InitThreads(); + #endif + #endif + /*--- Module creation code ---*/ + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("utils_any2vec_fast", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + #endif + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_COMPILING_IN_PYPY + Py_INCREF(__pyx_b); + #endif + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_gensim__models__utils_any2vec_fast) { + if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "gensim.models.utils_any2vec_fast")) { + if (unlikely(PyDict_SetItemString(modules, "gensim.models.utils_any2vec_fast", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global init code ---*/ + /*--- Variable export code ---*/ + /*--- Function export code ---*/ + /*--- Type init code ---*/ + /*--- Type import code ---*/ + /*--- Variable import code ---*/ + /*--- Function import code ---*/ + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "gensim/models/utils_any2vec_fast.pyx":7 + * # coding: utf-8 + * + * def ft_hash(unicode string): # <<<<<<<<<<<<<< + * cdef unsigned int h = 2166136261 + * for c in string: + */ + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_18utils_any2vec_fast_1ft_hash, NULL, __pyx_n_s_gensim_models_utils_any2vec_fast); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_ft_hash, __pyx_t_1) < 0) __PYX_ERR(0, 7, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":1 + * #!/usr/bin/env cython # <<<<<<<<<<<<<< + * # cython: boundscheck=False + * # cython: wraparound=False + */ + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + if (__pyx_m) { + if (__pyx_d) { + __Pyx_AddTraceback("init gensim.models.utils_any2vec_fast", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + Py_DECREF(__pyx_m); __pyx_m = 0; + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init gensim.models.utils_any2vec_fast"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if PY_MAJOR_VERSION < 3 + return; + #else + return __pyx_m; + #endif +} + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule((char *)modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* ArgTypeTest */ +static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", + name, type->tp_name, Py_TYPE(obj)->tp_name); +} +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact) +{ + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (none_allowed && obj == Py_None) return 1; + else if (exact) { + if (likely(Py_TYPE(obj) == type)) return 1; + #if PY_MAJOR_VERSION == 2 + else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(PyObject_TypeCheck(obj, type))) return 1; + } + __Pyx_RaiseArgumentTypeInvalid(name, obj, type); + return 0; +} + +/* unicode_iter */ +static CYTHON_INLINE int __Pyx_init_unicode_iteration( + PyObject* ustring, Py_ssize_t *length, void** data, int *kind) { +#if CYTHON_PEP393_ENABLED + if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return -1; + *kind = PyUnicode_KIND(ustring); + *length = PyUnicode_GET_LENGTH(ustring); + *data = PyUnicode_DATA(ustring); +#else + *kind = 0; + *length = PyUnicode_GET_SIZE(ustring); + *data = (void*)PyUnicode_AS_UNICODE(ustring); +#endif + return 0; +} + +/* CodeObjectCache */ +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} + +/* AddTraceback */ +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + #if PY_MAJOR_VERSION < 3 + py_srcfile = PyString_FromString(filename); + #else + py_srcfile = PyUnicode_FromString(filename); + #endif + if (!py_srcfile) goto bad; + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + #else + py_funcname = PyUnicode_FromString(funcname); + #endif + } + if (!py_funcname) goto bad; + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + Py_DECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + py_code = __pyx_find_code_object(c_line ? c_line : py_line); + if (!py_code) { + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) goto bad; + __pyx_insert_code_object(c_line ? c_line : py_line, py_code); + } + py_frame = PyFrame_New( + PyThreadState_GET(), /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) { + const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(unsigned int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(unsigned int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(unsigned int), + little, !is_unsigned); + } +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { + const long neg_one = (long) -1, const_zero = (long) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); + } +} + +/* CIntFromPyVerify */ +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* CIntFromPy */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { + const long neg_one = (long) -1, const_zero = (long) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(long) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(long) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) + case -2: + if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } +#endif + if (sizeof(long) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + long val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (long) -1; + } + } else { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* CIntFromPy */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { + const int neg_one = (int) -1, const_zero = (int) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) + case -2: + if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } +#endif + if (sizeof(int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + int val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (int) -1; + } + } else { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CheckBinaryVersion */ +static int __Pyx_check_binary_version(void) { + char ctversion[4], rtversion[4]; + PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); + PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); + if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compiletime version %s of module '%.100s' " + "does not match runtime version %s", + ctversion, __Pyx_MODULE_NAME, rtversion); + return PyErr_WarnEx(NULL, message, 1); + } + return 0; +} + +/* InitStrings */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION < 3 + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + #else + if (t->is_unicode | t->is_str) { + if (t->intern) { + *t->p = PyUnicode_InternFromString(t->s); + } else if (t->encoding) { + *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); + } else { + *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + } + } else { + *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); + } + #endif + if (!*t->p) + return -1; + ++t; + } + return 0; +} + +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); +} +static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { +#if PY_VERSION_HEX < 0x03030000 + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +#else + if (__Pyx_PyUnicode_READY(o) == -1) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (PyUnicode_IS_ASCII(o)) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +#endif + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (PyInt_Check(x) || PyLong_Check(x)) +#else + if (PyLong_Check(x)) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = PyNumber_Int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = PyNumber_Long(x); + } + #else + if (m && m->nb_int) { + name = "int"; + res = PyNumber_Long(x); + } + #endif +#else + res = PyNumber_Int(x); +#endif + if (res) { +#if PY_MAJOR_VERSION < 3 + if (!PyInt_Check(res) && !PyLong_Check(res)) { +#else + if (!PyLong_Check(res)) { +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type %.200s)", + name, name, Py_TYPE(res)->tp_name); + Py_DECREF(res); + return NULL; + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(x); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)b)->ob_digit; + const Py_ssize_t size = Py_SIZE(b); + if (likely(__Pyx_sst_abs(size) <= 1)) { + ival = likely(size) ? digits[0] : 0; + if (size == -1) ival = -ival; + return ival; + } else { + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +#endif /* Py_PYTHON_H */ diff --git a/gensim/models/utils_any2vec_fast.pyx b/gensim/models/utils_any2vec_fast.pyx new file mode 100644 index 0000000000..703f661d3e --- /dev/null +++ b/gensim/models/utils_any2vec_fast.pyx @@ -0,0 +1,12 @@ +#!/usr/bin/env cython +# cython: boundscheck=False +# cython: wraparound=False +# cython: cdivision=True +# coding: utf-8 + +def ft_hash(unicode string): + cdef unsigned int h = 2166136261 + for c in string: + h ^= ord(c) + h *= 16777619 + return h From 9f3428a3b004113530d512542258f9941dd77241 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 3 Jan 2018 21:02:59 +0100 Subject: [PATCH 02/20] Cythonize fasttext.compute_ngrams for 2x performance improvement --- gensim/models/utils_any2vec.py | 50 +- gensim/models/utils_any2vec_fast.c | 1157 +++++++++++++++++++++++++- gensim/models/utils_any2vec_fast.pyx | 11 + 3 files changed, 1166 insertions(+), 52 deletions(-) diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py index 416fe89166..05d50c07a4 100644 --- a/gensim/models/utils_any2vec.py +++ b/gensim/models/utils_any2vec.py @@ -18,7 +18,9 @@ logger = logging.getLogger(__name__) try: - from gensim.models.utils_any2vec_fast import ft_hash as _ft_hash + from gensim.models.utils_any2vec_fast import ( + ft_hash as _ft_hash, + compute_ngrams as _compute_ngrams) except ImportError: # failed... fall back to plain python (~100x slower than the above) def _ft_hash(string): @@ -36,6 +38,7 @@ def _ft_hash(string): The hash of the string """ + raise Exception("slow") # Runtime warnings for integer overflow are raised, this is expected behaviour. These warnings are suppressed. old_settings = np.seterr(all='ignore') h = np.uint32(2166136261) @@ -45,32 +48,31 @@ def _ft_hash(string): np.seterr(**old_settings) return h + def _compute_ngrams(word, min_n, max_n): + """Returns the list of all possible ngrams for a given word. -def _compute_ngrams(word, min_n, max_n): - """Returns the list of all possible ngrams for a given word. - - Parameters - ---------- - word : str - The word whose ngrams need to be computed - min_n : int - minimum character length of the ngrams - max_n : int - maximum character length of the ngrams + Parameters + ---------- + word : str + The word whose ngrams need to be computed + min_n : int + minimum character length of the ngrams + max_n : int + maximum character length of the ngrams - Returns - ------- - :obj:`list` of :obj:`str` - List of character ngrams + Returns + ------- + :obj:`list` of :obj:`str` + List of character ngrams - """ - BOW, EOW = ('<', '>') # Used by FastText to attach to all words as prefix and suffix - extended_word = BOW + word + EOW - ngrams = [] - for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): - for i in range(0, len(extended_word) - ngram_length + 1): - ngrams.append(extended_word[i:i + ngram_length]) - return ngrams + """ + BOW, EOW = ('<', '>') # Used by FastText to attach to all words as prefix and suffix + extended_word = BOW + word + EOW + ngrams = [] + for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + for i in range(0, len(extended_word) - ngram_length + 1): + ngrams.append(extended_word[i:i + ngram_length]) + return ngrams def _save_word2vec_format(fname, vocab, vectors, fvocab=None, binary=False, total_vec=None): diff --git a/gensim/models/utils_any2vec_fast.c b/gensim/models/utils_any2vec_fast.c index 3980e8462b..e48741f295 100644 --- a/gensim/models/utils_any2vec_fast.c +++ b/gensim/models/utils_any2vec_fast.c @@ -700,6 +700,25 @@ static const char *__pyx_f[] = { #define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) #define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + /* ArgTypeTest.proto */ static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, const char *name, int exact); @@ -708,6 +727,83 @@ static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, in static CYTHON_INLINE int __Pyx_init_unicode_iteration( PyObject* ustring, Py_ssize_t *length, void** data, int *kind); +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ + const char* function_name); + +/* PyObjectFormatSimple.proto */ +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + PyObject_Format(s, f)) +#elif PY_MAJOR_VERSION < 3 + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + likely(PyString_CheckExact(s)) ? PyUnicode_FromEncodedObject(s, NULL, "strict") :\ + PyObject_Format(s, f)) +#elif CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + likely(PyLong_CheckExact(s)) ? PyLong_Type.tp_str(s) :\ + likely(PyFloat_CheckExact(s)) ? PyFloat_Type.tp_str(s) :\ + PyObject_Format(s, f)) +#else + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + PyObject_Format(s, f)) +#endif + +/* IncludeStringH.proto */ +#include + +/* JoinPyUnicode.proto */ +static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, + Py_UCS4 max_char); + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* PyIntBinop.proto */ +#if !CYTHON_COMPILING_IN_PYPY +static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace); +#else +#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace)\ + (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2)) +#endif + +/* ListAppend.proto */ +#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS +static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) { + PyListObject* L = (PyListObject*) list; + Py_ssize_t len = Py_SIZE(list); + if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) { + Py_INCREF(x); + PyList_SET_ITEM(list, len, x); + Py_SIZE(list) = len+1; + return 0; + } + return PyList_Append(list, x); +} +#else +#define __Pyx_PyList_Append(L,x) PyList_Append(L,x) +#endif + +/* PyUnicode_Substring.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( + PyObject* text, Py_ssize_t start, Py_ssize_t stop); + /* CodeObjectCache.proto */ typedef struct { PyCodeObject* code_object; @@ -730,6 +826,9 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, /* CIntToPy.proto */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value); +/* CIntFromPy.proto */ +static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *); + /* CIntToPy.proto */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); @@ -751,25 +850,53 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); int __pyx_module_is_main_gensim__models__utils_any2vec_fast = 0; /* Implementation of 'gensim.models.utils_any2vec_fast' */ +static PyObject *__pyx_builtin_range; +static const char __pyx_k_[] = "<"; static const char __pyx_k_c[] = "c"; static const char __pyx_k_h[] = "h"; +static const char __pyx_k_i[] = "i"; +static const char __pyx_k__2[] = ">"; static const char __pyx_k_main[] = "__main__"; static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_word[] = "word"; +static const char __pyx_k_max_n[] = "max_n"; +static const char __pyx_k_min_n[] = "min_n"; +static const char __pyx_k_range[] = "range"; +static const char __pyx_k_ngrams[] = "ngrams"; static const char __pyx_k_string[] = "string"; static const char __pyx_k_ft_hash[] = "ft_hash"; +static const char __pyx_k_ngram_length[] = "ngram_length"; +static const char __pyx_k_extended_word[] = "extended_word"; +static const char __pyx_k_compute_ngrams[] = "compute_ngrams"; static const char __pyx_k_home_jojo_projekte_gensim_gensi[] = "/home/jojo/projekte/gensim/gensim/models/utils_any2vec_fast.pyx"; static const char __pyx_k_gensim_models_utils_any2vec_fast[] = "gensim.models.utils_any2vec_fast"; +static PyObject *__pyx_kp_u_; +static PyObject *__pyx_kp_u__2; static PyObject *__pyx_n_s_c; +static PyObject *__pyx_n_s_compute_ngrams; +static PyObject *__pyx_n_s_extended_word; static PyObject *__pyx_n_s_ft_hash; static PyObject *__pyx_n_s_gensim_models_utils_any2vec_fast; static PyObject *__pyx_n_s_h; static PyObject *__pyx_kp_s_home_jojo_projekte_gensim_gensi; +static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_main; +static PyObject *__pyx_n_s_max_n; +static PyObject *__pyx_n_s_min_n; +static PyObject *__pyx_n_s_ngram_length; +static PyObject *__pyx_n_s_ngrams; +static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_string; static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_word; static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ -static PyObject *__pyx_tuple_; -static PyObject *__pyx_codeobj__2; +static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n); /* proto */ +static PyObject *__pyx_int_0; +static PyObject *__pyx_int_1; +static PyObject *__pyx_tuple__3; +static PyObject *__pyx_tuple__5; +static PyObject *__pyx_codeobj__4; +static PyObject *__pyx_codeobj__6; /* "gensim/models/utils_any2vec_fast.pyx":7 * # coding: utf-8 @@ -854,6 +981,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN * h ^= ord(c) * h *= 16777619 # <<<<<<<<<<<<<< * return h + * */ __pyx_v_h = (__pyx_v_h * 0x1000193); } @@ -863,6 +991,8 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN * h ^= ord(c) * h *= 16777619 * return h # <<<<<<<<<<<<<< + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_8 = __Pyx_PyInt_From_unsigned_int(__pyx_v_h); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 12, __pyx_L1_error) @@ -891,6 +1021,368 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN return __pyx_r; } +/* "gensim/models/utils_any2vec_fast.pyx":15 + * + * + * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_6gensim_6models_18utils_any2vec_fast_3compute_ngrams = {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_word = 0; + unsigned int __pyx_v_min_n; + unsigned int __pyx_v_max_n; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("compute_ngrams (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_word,&__pyx_n_s_min_n,&__pyx_n_s_max_n,0}; + PyObject* values[3] = {0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_min_n)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 1); __PYX_ERR(0, 15, __pyx_L3_error) + } + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_n)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 2); __PYX_ERR(0, 15, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "compute_ngrams") < 0)) __PYX_ERR(0, 15, __pyx_L3_error) + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + } + __pyx_v_word = ((PyObject*)values[0]); + __pyx_v_min_n = __Pyx_PyInt_As_unsigned_int(values[1]); if (unlikely((__pyx_v_min_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) + __pyx_v_max_n = __Pyx_PyInt_As_unsigned_int(values[2]); if (unlikely((__pyx_v_max_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error) + __pyx_L3_error:; + __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_word), (&PyUnicode_Type), 1, "word", 1))) __PYX_ERR(0, 15, __pyx_L1_error) + __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(__pyx_self, __pyx_v_word, __pyx_v_min_n, __pyx_v_max_n); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n) { + PyObject *__pyx_v_extended_word = 0; + PyObject *__pyx_v_ngrams = NULL; + PyObject *__pyx_v_ngram_length = NULL; + PyObject *__pyx_v_i = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + Py_UCS4 __pyx_t_3; + PyObject *__pyx_t_4 = NULL; + unsigned int __pyx_t_5; + Py_ssize_t __pyx_t_6; + PyObject *__pyx_t_7 = NULL; + PyObject *(*__pyx_t_8)(PyObject *); + PyObject *(*__pyx_t_9)(PyObject *); + Py_ssize_t __pyx_t_10; + Py_ssize_t __pyx_t_11; + int __pyx_t_12; + __Pyx_RefNannySetupContext("compute_ngrams", 0); + + /* "gensim/models/utils_any2vec_fast.pyx":16 + * + * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + * cdef unicode extended_word = f'<{word}>' # <<<<<<<<<<<<<< + * ngrams = [] + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + */ + __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = 0; + __pyx_t_3 = 127; + __Pyx_INCREF(__pyx_kp_u_); + __pyx_t_2 += 1; + __Pyx_GIVEREF(__pyx_kp_u_); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_kp_u_); + __pyx_t_4 = __Pyx_PyObject_FormatSimple(__pyx_v_word, __pyx_empty_unicode); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_4) > __pyx_t_3) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_4) : __pyx_t_3; + __pyx_t_2 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); + __Pyx_GIVEREF(__pyx_t_4); + PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_4); + __pyx_t_4 = 0; + __Pyx_INCREF(__pyx_kp_u__2); + __pyx_t_2 += 1; + __Pyx_GIVEREF(__pyx_kp_u__2); + PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_kp_u__2); + __pyx_t_4 = __Pyx_PyUnicode_Join(__pyx_t_1, 3, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_extended_word = ((PyObject*)__pyx_t_4); + __pyx_t_4 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":17 + * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] # <<<<<<<<<<<<<< + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + * for i in range(0, len(extended_word) - ngram_length + 1): + */ + __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_v_ngrams = ((PyObject*)__pyx_t_4); + __pyx_t_4 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":18 + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): # <<<<<<<<<<<<<< + * for i in range(0, len(extended_word) - ngram_length + 1): + * ngrams.append(extended_word[i:i + ngram_length]) + */ + __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_min_n); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = __pyx_v_max_n; + __pyx_t_2 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_extended_word); if (unlikely(__pyx_t_2 == -1)) __PYX_ERR(0, 18, __pyx_L1_error) + if (((__pyx_t_5 < __pyx_t_2) != 0)) { + __pyx_t_6 = __pyx_t_5; + } else { + __pyx_t_6 = __pyx_t_2; + } + __pyx_t_1 = PyInt_FromSsize_t((__pyx_t_6 + 1)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_GIVEREF(__pyx_t_4); + PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_4); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_1); + __pyx_t_4 = 0; + __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { + __pyx_t_7 = __pyx_t_1; __Pyx_INCREF(__pyx_t_7); __pyx_t_6 = 0; + __pyx_t_8 = NULL; + } else { + __pyx_t_6 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_8 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 18, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + for (;;) { + if (likely(!__pyx_t_8)) { + if (likely(PyList_CheckExact(__pyx_t_7))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_7)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 18, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_7, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_7)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 18, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_7, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } + } else { + __pyx_t_1 = __pyx_t_8(__pyx_t_7); + if (unlikely(!__pyx_t_1)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 18, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_1); + } + __Pyx_XDECREF_SET(__pyx_v_ngram_length, __pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":19 + * ngrams = [] + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + * for i in range(0, len(extended_word) - ngram_length + 1): # <<<<<<<<<<<<<< + * ngrams.append(extended_word[i:i + ngram_length]) + * return ngrams + */ + __pyx_t_2 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_extended_word); if (unlikely(__pyx_t_2 == -1)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyNumber_Subtract(__pyx_t_1, __pyx_v_ngram_length); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_t_4, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_INCREF(__pyx_int_0); + __Pyx_GIVEREF(__pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_int_0); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_1); + __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { + __pyx_t_4 = __pyx_t_1; __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = 0; + __pyx_t_9 = NULL; + } else { + __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 19, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + for (;;) { + if (likely(!__pyx_t_9)) { + if (likely(PyList_CheckExact(__pyx_t_4))) { + if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_4)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(0, 19, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } else { + if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(0, 19, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } + } else { + __pyx_t_1 = __pyx_t_9(__pyx_t_4); + if (unlikely(!__pyx_t_1)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 19, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_1); + } + __Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":20 + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + * for i in range(0, len(extended_word) - ngram_length + 1): + * ngrams.append(extended_word[i:i + ngram_length]) # <<<<<<<<<<<<<< + * return ngrams + * + */ + __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error) + __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_v_ngram_length); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_11 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_11 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyUnicode_Substring(__pyx_v_extended_word, __pyx_t_10, __pyx_t_11); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_ngrams, __pyx_t_1); if (unlikely(__pyx_t_12 == -1)) __PYX_ERR(0, 20, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":19 + * ngrams = [] + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + * for i in range(0, len(extended_word) - ngram_length + 1): # <<<<<<<<<<<<<< + * ngrams.append(extended_word[i:i + ngram_length]) + * return ngrams + */ + } + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":18 + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] + * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): # <<<<<<<<<<<<<< + * for i in range(0, len(extended_word) - ngram_length + 1): + * ngrams.append(extended_word[i:i + ngram_length]) + */ + } + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + + /* "gensim/models/utils_any2vec_fast.pyx":21 + * for i in range(0, len(extended_word) - ngram_length + 1): + * ngrams.append(extended_word[i:i + ngram_length]) + * return ngrams # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_ngrams); + __pyx_r = __pyx_v_ngrams; + goto __pyx_L0; + + /* "gensim/models/utils_any2vec_fast.pyx":15 + * + * + * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_extended_word); + __Pyx_XDECREF(__pyx_v_ngrams); + __Pyx_XDECREF(__pyx_v_ngram_length); + __Pyx_XDECREF(__pyx_v_i); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + static PyMethodDef __pyx_methods[] = { {0, 0, 0, 0} }; @@ -914,18 +1406,32 @@ static struct PyModuleDef __pyx_moduledef = { #endif static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0}, + {&__pyx_kp_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 0}, {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, + {&__pyx_n_s_compute_ngrams, __pyx_k_compute_ngrams, sizeof(__pyx_k_compute_ngrams), 0, 0, 1, 1}, + {&__pyx_n_s_extended_word, __pyx_k_extended_word, sizeof(__pyx_k_extended_word), 0, 0, 1, 1}, {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_utils_any2vec_fast, __pyx_k_gensim_models_utils_any2vec_fast, sizeof(__pyx_k_gensim_models_utils_any2vec_fast), 0, 0, 1, 1}, {&__pyx_n_s_h, __pyx_k_h, sizeof(__pyx_k_h), 0, 0, 1, 1}, {&__pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_k_home_jojo_projekte_gensim_gensi, sizeof(__pyx_k_home_jojo_projekte_gensim_gensi), 0, 0, 1, 0}, + {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_max_n, __pyx_k_max_n, sizeof(__pyx_k_max_n), 0, 0, 1, 1}, + {&__pyx_n_s_min_n, __pyx_k_min_n, sizeof(__pyx_k_min_n), 0, 0, 1, 1}, + {&__pyx_n_s_ngram_length, __pyx_k_ngram_length, sizeof(__pyx_k_ngram_length), 0, 0, 1, 1}, + {&__pyx_n_s_ngrams, __pyx_k_ngrams, sizeof(__pyx_k_ngrams), 0, 0, 1, 1}, + {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, {&__pyx_n_s_string, __pyx_k_string, sizeof(__pyx_k_string), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 18, __pyx_L1_error) return 0; + __pyx_L1_error:; + return -1; } static int __Pyx_InitCachedConstants(void) { @@ -939,10 +1445,22 @@ static int __Pyx_InitCachedConstants(void) { * cdef unsigned int h = 2166136261 * for c in string: */ - __pyx_tuple_ = PyTuple_Pack(3, __pyx_n_s_string, __pyx_n_s_h, __pyx_n_s_c); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 7, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple_); - __Pyx_GIVEREF(__pyx_tuple_); - __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_ft_hash, 7, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) __PYX_ERR(0, 7, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(3, __pyx_n_s_string, __pyx_n_s_h, __pyx_n_s_c); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 7, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__3); + __Pyx_GIVEREF(__pyx_tuple__3); + __pyx_codeobj__4 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__3, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_ft_hash, 7, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__4)) __PYX_ERR(0, 7, __pyx_L1_error) + + /* "gensim/models/utils_any2vec_fast.pyx":15 + * + * + * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] + */ + __pyx_tuple__5 = PyTuple_Pack(7, __pyx_n_s_word, __pyx_n_s_min_n, __pyx_n_s_max_n, __pyx_n_s_extended_word, __pyx_n_s_ngrams, __pyx_n_s_ngram_length, __pyx_n_s_i); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__5); + __Pyx_GIVEREF(__pyx_tuple__5); + __pyx_codeobj__6 = (PyObject*)__Pyx_PyCode_New(3, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__5, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_compute_ngrams, 15, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__6)) __PYX_ERR(0, 15, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -952,6 +1470,8 @@ static int __Pyx_InitCachedConstants(void) { static int __Pyx_InitGlobals(void) { if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) return 0; __pyx_L1_error:; return -1; @@ -1061,6 +1581,18 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_ft_hash, __pyx_t_1) < 0) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + /* "gensim/models/utils_any2vec_fast.pyx":15 + * + * + * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cdef unicode extended_word = f'<{word}>' + * ngrams = [] + */ + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, NULL, __pyx_n_s_gensim_models_utils_any2vec_fast); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_compute_ngrams, __pyx_t_1) < 0) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + /* "gensim/models/utils_any2vec_fast.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False @@ -1111,6 +1643,20 @@ static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { } #endif +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); + if (unlikely(!result)) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + /* ArgTypeTest */ static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { PyErr_Format(PyExc_TypeError, @@ -1154,6 +1700,372 @@ static CYTHON_INLINE int __Pyx_init_unicode_iteration( return 0; } +/* RaiseArgTupleInvalid */ +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +/* RaiseDoubleKeywords */ +static void __Pyx_RaiseDoubleKeywordsError( + const char* func_name, + PyObject* kw_name) +{ + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION >= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + while (PyDict_Next(kwds, &pos, &key, &value)) { + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; + continue; + } + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = (**name == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION < 3 + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + return -1; +} + +/* JoinPyUnicode */ +static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, + CYTHON_UNUSED Py_UCS4 max_char) { +#if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + PyObject *result_uval; + int result_ukind; + Py_ssize_t i, char_pos; + void *result_udata; +#if CYTHON_PEP393_ENABLED + result_uval = PyUnicode_New(result_ulength, max_char); + if (unlikely(!result_uval)) return NULL; + result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND; + result_udata = PyUnicode_DATA(result_uval); +#else + result_uval = PyUnicode_FromUnicode(NULL, result_ulength); + if (unlikely(!result_uval)) return NULL; + result_ukind = sizeof(Py_UNICODE); + result_udata = PyUnicode_AS_UNICODE(result_uval); +#endif + char_pos = 0; + for (i=0; i < value_count; i++) { + int ukind; + Py_ssize_t ulength; + void *udata; + PyObject *uval = PyTuple_GET_ITEM(value_tuple, i); + if (unlikely(__Pyx_PyUnicode_READY(uval))) + goto bad; + ulength = __Pyx_PyUnicode_GET_LENGTH(uval); + if (unlikely(!ulength)) + continue; + if (unlikely(char_pos + ulength < 0)) + goto overflow; + ukind = __Pyx_PyUnicode_KIND(uval); + udata = __Pyx_PyUnicode_DATA(uval); + if (!CYTHON_PEP393_ENABLED || ukind == result_ukind) { + memcpy((char *)result_udata + char_pos * result_ukind, udata, ulength * result_ukind); + } else { + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 + _PyUnicode_FastCopyCharacters(result_uval, char_pos, uval, 0, ulength); + #else + Py_ssize_t j; + for (j=0; j < ulength; j++) { + Py_UCS4 uchar = __Pyx_PyUnicode_READ(ukind, udata, j); + __Pyx_PyUnicode_WRITE(result_ukind, result_udata, char_pos+j, uchar); + } + #endif + } + char_pos += ulength; + } + return result_uval; +overflow: + PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string"); +bad: + Py_DECREF(result_uval); + return NULL; +#else + result_ulength++; + value_count++; + return PyUnicode_Join(__pyx_empty_unicode, value_tuple); +#endif +} + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = func->ob_type->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyIntBinop */ +#if !CYTHON_COMPILING_IN_PYPY +static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, CYTHON_UNUSED int inplace) { + #if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(op1))) { + const long b = intval; + long x; + long a = PyInt_AS_LONG(op1); + x = (long)((unsigned long)a + b); + if (likely((x^a) >= 0 || (x^b) >= 0)) + return PyInt_FromLong(x); + return PyLong_Type.tp_as_number->nb_add(op1, op2); + } + #endif + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(PyLong_CheckExact(op1))) { + const long b = intval; + long a, x; +#ifdef HAVE_LONG_LONG + const PY_LONG_LONG llb = intval; + PY_LONG_LONG lla, llx; +#endif + const digit* digits = ((PyLongObject*)op1)->ob_digit; + const Py_ssize_t size = Py_SIZE(op1); + if (likely(__Pyx_sst_abs(size) <= 1)) { + a = likely(size) ? digits[0] : 0; + if (size == -1) a = -a; + } else { + switch (size) { + case -2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; +#ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; +#endif + } + case 2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; +#ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; +#endif + } + case -3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; +#ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; +#endif + } + case 3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; +#ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; +#endif + } + case -4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; +#ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; +#endif + } + case 4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; +#ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; +#endif + } + default: return PyLong_Type.tp_as_number->nb_add(op1, op2); + } + } + x = a + b; + return PyLong_FromLong(x); +#ifdef HAVE_LONG_LONG + long_long: + llx = lla + llb; + return PyLong_FromLongLong(llx); +#endif + + + } + #endif + if (PyFloat_CheckExact(op1)) { + const long b = intval; + double a = PyFloat_AS_DOUBLE(op1); + double result; + PyFPE_START_PROTECT("add", return NULL) + result = ((double)a) + (double)b; + PyFPE_END_PROTECT(result) + return PyFloat_FromDouble(result); + } + return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2); +} +#endif + +/* PyUnicode_Substring */ +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( + PyObject* text, Py_ssize_t start, Py_ssize_t stop) { + Py_ssize_t length; + if (unlikely(__Pyx_PyUnicode_READY(text) == -1)) return NULL; + length = __Pyx_PyUnicode_GET_LENGTH(text); + if (start < 0) { + start += length; + if (start < 0) + start = 0; + } + if (stop < 0) + stop += length; + else if (stop > length) + stop = length; + length = stop - start; + if (length <= 0) + return PyUnicode_FromUnicode(NULL, 0); +#if CYTHON_PEP393_ENABLED + return PyUnicode_FromKindAndData(PyUnicode_KIND(text), + PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); +#else + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start); +#endif +} + /* CodeObjectCache */ static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { int start = 0, mid = 0, end = count - 1; @@ -1315,6 +2227,28 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, Py_XDECREF(py_frame); } +/* CIntFromPyVerify */ +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + /* CIntToPy */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) { const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; @@ -1346,6 +2280,195 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) } } +/* CIntFromPy */ +static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *x) { + const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(unsigned int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (unsigned int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (unsigned int) 0; + case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, digits[0]) + case 2: + if (8 * sizeof(unsigned int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) >= 2 * PyLong_SHIFT) { + return (unsigned int) (((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(unsigned int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) >= 3 * PyLong_SHIFT) { + return (unsigned int) (((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(unsigned int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) >= 4 * PyLong_SHIFT) { + return (unsigned int) (((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (unsigned int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(unsigned int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (unsigned int) 0; + case -1: __PYX_VERIFY_RETURN_INT(unsigned int, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, +digits[0]) + case -2: + if (8 * sizeof(unsigned int) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT) { + return (unsigned int) (((unsigned int)-1)*(((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(unsigned int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT) { + return (unsigned int) ((((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT) { + return (unsigned int) (((unsigned int)-1)*(((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(unsigned int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT) { + return (unsigned int) ((((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT) { + return (unsigned int) (((unsigned int)-1)*(((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(unsigned int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT) { + return (unsigned int) ((((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + } +#endif + if (sizeof(unsigned int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned int) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + unsigned int val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (unsigned int) -1; + } + } else { + unsigned int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (unsigned int) -1; + val = __Pyx_PyInt_As_unsigned_int(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to unsigned int"); + return (unsigned int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to unsigned int"); + return (unsigned int) -1; +} + /* CIntToPy */ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { const long neg_one = (long) -1, const_zero = (long) 0; @@ -1377,28 +2500,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { } } -/* CIntFromPyVerify */ -#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) -#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) -#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ - {\ - func_type value = func_value;\ - if (sizeof(target_type) < sizeof(func_type)) {\ - if (unlikely(value != (func_type) (target_type) value)) {\ - func_type zero = 0;\ - if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ - return (target_type) -1;\ - if (is_unsigned && unlikely(value < zero))\ - goto raise_neg_overflow;\ - else\ - goto raise_overflow;\ - }\ - }\ - return (target_type) value;\ - } - /* CIntFromPy */ static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { const long neg_one = (long) -1, const_zero = (long) 0; diff --git a/gensim/models/utils_any2vec_fast.pyx b/gensim/models/utils_any2vec_fast.pyx index 703f661d3e..42d7618233 100644 --- a/gensim/models/utils_any2vec_fast.pyx +++ b/gensim/models/utils_any2vec_fast.pyx @@ -10,3 +10,14 @@ def ft_hash(unicode string): h ^= ord(c) h *= 16777619 return h + + +def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + cdef unicode extended_word = f'<{word}>' + ngrams = [] + for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): + for i in range(0, len(extended_word) - ngram_length + 1): + ngrams.append(extended_word[i:i + ngram_length]) + return ngrams + + From 51a1a6e2994c8f74e5950cfa0d9dddaa79bf4e76 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 3 Jan 2018 21:21:42 +0100 Subject: [PATCH 03/20] Reduce fasttext memory usage by computing ngrams on the fly --- gensim/models/deprecated/fasttext_wrapper.py | 34 +- gensim/models/fasttext.py | 134 +- gensim/models/fasttext_inner.c | 2362 +++++++++++------- gensim/models/fasttext_inner.pyx | 8 +- gensim/models/keyedvectors.py | 13 +- gensim/models/utils_any2vec.py | 1 - gensim/models/utils_any2vec_fast.c | 533 ++-- gensim/models/utils_any2vec_fast.pyx | 4 +- gensim/test/test_fasttext.py | 78 +- gensim/test/test_fasttext_wrapper.py | 52 +- setup.py | 3 + 11 files changed, 1843 insertions(+), 1379 deletions(-) diff --git a/gensim/models/deprecated/fasttext_wrapper.py b/gensim/models/deprecated/fasttext_wrapper.py index e48a96072f..9e7416c2a8 100644 --- a/gensim/models/deprecated/fasttext_wrapper.py +++ b/gensim/models/deprecated/fasttext_wrapper.py @@ -70,9 +70,7 @@ def __init__(self): self.syn0_vocab_norm = None self.syn0_ngrams = None self.syn0_ngrams_norm = None - self.ngrams = {} self.hash2index = {} - self.ngrams_word = {} self.min_n = 0 self.max_n = 0 @@ -99,17 +97,18 @@ def word_vec(self, word, use_norm=False): return super(FastTextKeyedVectors, self).word_vec(word, use_norm) else: word_vec = np.zeros(self.syn0_ngrams.shape[1], dtype=np.float32) - ngrams = compute_ngrams(word, self.min_n, self.max_n) - ngrams = [ng for ng in ngrams if ng in self.ngrams] + hashes = [ft_hash(ng) % self.bucket + for ng in compute_ngrams(word, self.min_n, self.max_n)] + hashes = [h for h in hashes if h in self.hash2index] if use_norm: ngram_weights = self.syn0_ngrams_norm else: ngram_weights = self.syn0_ngrams - for ngram in ngrams: - word_vec += ngram_weights[self.ngrams[ngram]] + for ngram_hash in hashes: + word_vec += ngram_weights[self.hash2index[ngram_hash]] if word_vec.any(): - return word_vec / len(ngrams) - else: # No ngrams of the word are present in self.ngrams + return word_vec / len(hashes) + else: # No hashes of any ngrams of the word are present in self.hash2index raise KeyError('all ngrams for word %s absent from model' % word) def init_sims(self, replace=False): @@ -143,7 +142,8 @@ def __contains__(self, word): return True else: char_ngrams = compute_ngrams(word, self.min_n, self.max_n) - return any(ng in self.ngrams for ng in char_ngrams) + return any(ft_hash(ng) % self.bucket in self.hash2index + for ng in char_ngrams) @classmethod def load_word2vec_format(cls, *args, **kwargs): @@ -277,6 +277,12 @@ def load(cls, *args, **kwargs): if hasattr(model.wv, 'syn0_all'): setattr(model.wv, 'syn0_ngrams', model.wv.syn0_all) delattr(model.wv, 'syn0_all') + setattr(model.wv, 'bucket', model.wv.syn0_ngrams.shape[0]) + if not hasattr(model.wv, 'hash2index') and hasattr(model.wv, 'ngrams'): + model.wv.hash2index = {} + for i, ngram in enumerate(model.wv.ngrams): + ngram_hash = ft_hash(ngram) % model.wv.bucket + model.wv.hash2index[ngram_hash] = i return model @classmethod @@ -316,6 +322,7 @@ def load_model_params(self, file_handle): self.hs = loss == 1 self.sg = model == 2 self.bucket = bucket + self.wv.bucket = bucket self.wv.min_n = minn self.wv.max_n = maxn self.sample = t @@ -394,7 +401,6 @@ def init_ngrams(self): vectors are discarded here to save space. """ - self.wv.ngrams = {} all_ngrams = [] self.wv.syn0 = np.zeros((len(self.wv.vocab), self.vector_size), dtype=REAL) @@ -406,9 +412,9 @@ def init_ngrams(self): self.num_ngram_vectors = len(all_ngrams) ngram_indices = [] for i, ngram in enumerate(all_ngrams): - ngram_hash = ft_hash(ngram) + ngram_hash = ft_hash(ngram) % self.bucket ngram_indices.append(len(self.wv.vocab) + ngram_hash % self.bucket) - self.wv.ngrams[ngram] = i + self.wv.hash2index[ngram_hash] = i self.wv.syn0_ngrams = self.wv.syn0_ngrams.take(ngram_indices, axis=0) ngram_weights = self.wv.syn0_ngrams @@ -421,7 +427,9 @@ def init_ngrams(self): for w, vocab in self.wv.vocab.items(): word_ngrams = compute_ngrams(w, self.wv.min_n, self.wv.max_n) for word_ngram in word_ngrams: - self.wv.syn0[vocab.index] += np.array(ngram_weights[self.wv.ngrams[word_ngram]]) + ng_hash = ft_hash(word_ngram) % self.bucket + self.wv.syn0[vocab.index] += np.array( + ngram_weights[self.wv.hash2index[ng_hash]]) self.wv.syn0[vocab.index] /= (len(word_ngrams) + 1) logger.info( diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 6ea76819cf..6a0aab2988 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -39,7 +39,6 @@ from gensim.models.base_any2vec import BaseWordEmbeddingsModel from gensim.models.utils_any2vec import _compute_ngrams, _ft_hash -from six import iteritems from gensim.utils import deprecated, call_on_class_only from gensim import utils @@ -93,10 +92,12 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): for index in word2_indices: vocab_subwords_indices += [index] - word2_subwords += model.wv.ngrams_word[model.wv.index2word[index]] + word2_subwords += _compute_ngrams(model.wv.index2word[index], + model.min_n, model.max_n) for subword in word2_subwords: - ngrams_subwords_indices.append(model.wv.ngrams[subword]) + ngrams_subwords_indices.append( + model.wv.hash2index[_ft_hash(subword) % model.bucket]) l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size @@ -144,10 +145,11 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): start = max(0, pos - model.window + reduced_window) subwords_indices = [word.index] - word2_subwords = model.wv.ngrams_word[model.wv.index2word[word.index]] + word2_subwords = _compute_ngrams(model.wv.index2word[word.index], + model.min_n, model.max_n) for subword in word2_subwords: - subwords_indices.append(model.wv.ngrams[subword]) + subwords_indices.append(model.wv.hash2index[_ft_hash(subword) % model.bucket]) for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start): if pos2 != pos: # don't train on the `word` itself @@ -278,6 +280,7 @@ def __init__(self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, sorted_vocab=bool(sorted_vocab), null_word=null_word) self.trainables = FastTextTrainables( vector_size=size, seed=seed, bucket=bucket, hashfxn=hashfxn) + self.wv.bucket = self.bucket super(FastText, self).__init__( sentences=sentences, workers=workers, vector_size=size, epochs=iter, callbacks=callbacks, @@ -396,6 +399,36 @@ def _clear_post_train(self): self.wv.vectors_vocab_norm = None self.wv.vectors_ngrams_norm = None + def estimate_memory(self, vocab_size=None, report=None): + vocab_size = vocab_size or len(self.wv.vocab) + vec_size = self.vector_size * np.dtype(np.float32).itemsize + l1_size = self.layer1_size * np.dtype(np.float32).itemsize + report = report or {} + report['vocab'] = len(self.wv.vocab) * (700 if self.hs else 500) + report['syn0_vocab'] = len(self.wv.vocab) * vec_size + num_buckets = self.bucket + if self.hs: + report['syn1'] = len(self.wv.vocab) * l1_size + if self.negative: + report['syn1neg'] = len(self.wv.vocab) * l1_size + if self.word_ngrams > 0 and self.wv.vocab: + buckets = set() + for word in self.wv.vocab: + ngrams = _compute_ngrams(word, self.min_n, self.max_n) + buckets.update(_ft_hash(ng) % self.bucket for ng in ngrams) + num_buckets = len(buckets) + report['syn0_ngrams'] = len(buckets) * vec_size + elif self.word_ngrams > 0: + logger.warn('subword information is enabled, but no vocabulary ' + 'could be found, estimated required memory might be ' + 'inaccurate!') + report['total'] = sum(report.values()) + logger.info( + "estimated required memory for %i words, %i buckets and %i dimensions: %i bytes", + len(self.wv.vocab), num_buckets, self.vector_size, report['total'] + ) + return report + def _do_train_job(self, sentences, alpha, inits): """Train a single batch of sentences. Return 2-tuple `(effective word count after ignoring unknown words and sentence length trimming, total word count)`. @@ -580,6 +613,7 @@ def _load_model_params(self, file_handle): self.hs = loss == 1 self.sg = model == 2 self.trainables.bucket = bucket + self.wv.bucket = bucket self.wv.min_n = minn self.wv.max_n = maxn self.vocabulary.sample = t @@ -709,18 +743,8 @@ def prepare_vocab(self, hs, negative, wv, update=False, keep_raw_vocab=False, tr report_values = super(FastTextVocab, self).prepare_vocab( hs, negative, wv, update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, min_count=min_count, sample=sample, dry_run=dry_run) - self.build_ngrams(wv, update=update) return report_values - def build_ngrams(self, wv, update=False): - if not update: - wv.ngrams_word = {} - for w, v in iteritems(wv.vocab): - wv.ngrams_word[w] = _compute_ngrams(w, wv.min_n, wv.max_n) - else: - for w, v in iteritems(wv.vocab): - wv.ngrams_word[w] = _compute_ngrams(w, wv.min_n, wv.max_n) - class FastTextTrainables(Word2VecTrainables): def __init__(self, vector_size=100, seed=1, hashfxn=hash, bucket=2000000): @@ -744,54 +768,43 @@ def init_ngrams_weights(self, wv, update=False, vocabulary=None): """ if not update: - wv.ngrams = {} wv.vectors_vocab = empty((len(wv.vocab), wv.vector_size), dtype=REAL) self.vectors_vocab_lockf = ones((len(wv.vocab), wv.vector_size), dtype=REAL) wv.vectors_ngrams = empty((self.bucket, wv.vector_size), dtype=REAL) self.vectors_ngrams_lockf = ones((self.bucket, wv.vector_size), dtype=REAL) - all_ngrams = [] - for w, ngrams in iteritems(wv.ngrams_word): - all_ngrams += ngrams - - all_ngrams = list(set(all_ngrams)) - wv.num_ngram_vectors = len(all_ngrams) - logger.info("Total number of ngrams is %d", len(all_ngrams)) - wv.hash2index = {} ngram_indices = [] new_hash_count = 0 - for i, ngram in enumerate(all_ngrams): - ngram_hash = _ft_hash(ngram) % self.bucket - if ngram_hash in wv.hash2index: - wv.ngrams[ngram] = wv.hash2index[ngram_hash] - else: - ngram_indices.append(ngram_hash % self.bucket) - wv.hash2index[ngram_hash] = new_hash_count - wv.ngrams[ngram] = wv.hash2index[ngram_hash] - new_hash_count = new_hash_count + 1 + wv.num_ngram_vectors = 0 + for word in wv.vocab.keys(): + for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): + ngram_hash = _ft_hash(ngram) % self.bucket + if ngram_hash not in wv.hash2index: + wv.num_ngram_vectors += 1 + ngram_indices.append(ngram_hash) + wv.hash2index[ngram_hash] = new_hash_count + new_hash_count = new_hash_count + 1 + + logger.info("Total number of ngrams is %d", wv.num_ngram_vectors) wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0) self.vectors_ngrams_lockf = self.vectors_ngrams_lockf.take(ngram_indices, axis=0) self.reset_ngrams_weights(wv) else: - new_ngrams = [] - for w, ngrams in iteritems(wv.ngrams_word): - new_ngrams += [ng for ng in ngrams if ng not in wv.ngrams] - - new_ngrams = list(set(new_ngrams)) - wv.num_ngram_vectors += len(new_ngrams) - logger.info("Number of new ngrams is %d", len(new_ngrams)) new_hash_count = 0 - for i, ngram in enumerate(new_ngrams): - ngram_hash = _ft_hash(ngram) % self.bucket - if ngram_hash not in wv.hash2index: - wv.hash2index[ngram_hash] = new_hash_count + self.old_hash2index_len - wv.ngrams[ngram] = wv.hash2index[ngram_hash] - new_hash_count = new_hash_count + 1 - else: - wv.ngrams[ngram] = wv.hash2index[ngram_hash] + num_new_ngrams = 0 + for word in wv.vocab.keys(): + for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): + ngram_hash = _ft_hash(ngram) % self.bucket + if ngram_hash not in wv.hash2index: + wv.hash2index[ngram_hash] = new_hash_count + self.old_hash2index_len + new_hash_count = new_hash_count + 1 + num_new_ngrams += 1 + + wv.num_ngram_vectors += num_new_ngrams + logger.info("Number of new ngrams is %d", num_new_ngrams) rand_obj = np.random rand_obj.seed(self.seed) @@ -833,10 +846,11 @@ def get_vocab_word_vecs(self, wv): """Calculate vectors for words in vocabulary and stores them in `vectors`.""" for w, v in wv.vocab.items(): word_vec = np.copy(wv.vectors_vocab[v.index]) - ngrams = wv.ngrams_word[w] + ngrams = _compute_ngrams(w, wv.min_n, wv.max_n) ngram_weights = wv.vectors_ngrams for ngram in ngrams: - word_vec += ngram_weights[wv.ngrams[ngram]] + word_vec += ngram_weights[ + wv.hash2index[_ft_hash(ngram) % self.bucket]] word_vec /= (len(ngrams) + 1) wv.vectors[v.index] = word_vec @@ -847,20 +861,21 @@ def init_ngrams_post_load(self, file_name, wv): vectors are discarded here to save space. """ - all_ngrams = [] wv.vectors = np.zeros((len(wv.vocab), wv.vector_size), dtype=REAL) for w, vocab in wv.vocab.items(): - all_ngrams += _compute_ngrams(w, wv.min_n, wv.max_n) wv.vectors[vocab.index] += np.array(wv.vectors_ngrams[vocab.index]) - all_ngrams = set(all_ngrams) - wv.num_ngram_vectors = len(all_ngrams) ngram_indices = [] - for i, ngram in enumerate(all_ngrams): - ngram_hash = _ft_hash(ngram) - ngram_indices.append(len(wv.vocab) + ngram_hash % self.bucket) - wv.ngrams[ngram] = i + wv.num_ngram_vectors = 0 + for word in wv.vocab.keys(): + for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): + ngram_hash = _ft_hash(ngram) % self.bucket + if ngram_hash in wv.hash2index: + continue + ngram_indices.append(len(wv.vocab) + ngram_hash) + wv.hash2index[ngram_hash] = wv.num_ngram_vectors + wv.num_ngram_vectors += 1 wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0) ngram_weights = wv.vectors_ngrams @@ -873,7 +888,8 @@ def init_ngrams_post_load(self, file_name, wv): for w, vocab in wv.vocab.items(): word_ngrams = _compute_ngrams(w, wv.min_n, wv.max_n) for word_ngram in word_ngrams: - wv.vectors[vocab.index] += np.array(ngram_weights[wv.ngrams[word_ngram]]) + vec_idx = wv.hash2index[_ft_hash(word_ngram) % self.bucket] + wv.vectors[vocab.index] += np.array(ngram_weights[vec_idx]) wv.vectors[vocab.index] /= (len(word_ngrams) + 1) logger.info( diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index fb837e7b34..2593039f3d 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1,5 +1,16 @@ /* Generated by Cython 0.25.2 */ +/* BEGIN: Cython Metadata +{ + "distutils": { + "depends": [ + "gensim/models/voidptr.h" + ] + }, + "module_name": "gensim.models.fasttext_inner" +} +END: Cython Metadata */ + #define PY_SSIZE_T_CLEAN #include "Python.h" #ifndef Py_PYTHON_H @@ -658,7 +669,7 @@ static const char *__pyx_f[] = { "type.pxd", }; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":725 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":725 * # in Cython to enable them only on the right systems. * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< @@ -667,7 +678,7 @@ static const char *__pyx_f[] = { */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":726 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":726 * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< @@ -676,7 +687,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":727 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":727 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< @@ -685,7 +696,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t; */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":728 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":728 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< @@ -694,7 +705,7 @@ typedef npy_int32 __pyx_t_5numpy_int32_t; */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":732 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":732 * #ctypedef npy_int128 int128_t * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< @@ -703,7 +714,7 @@ typedef npy_int64 __pyx_t_5numpy_int64_t; */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":733 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":733 * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< @@ -712,7 +723,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":734 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":734 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< @@ -721,7 +732,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t; */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":735 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":735 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< @@ -730,7 +741,7 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t; */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":739 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":739 * #ctypedef npy_uint128 uint128_t * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< @@ -739,7 +750,7 @@ typedef npy_uint64 __pyx_t_5numpy_uint64_t; */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":740 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":740 * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< @@ -748,7 +759,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":749 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":749 * # The int types are mapped a bit surprising -- * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t # <<<<<<<<<<<<<< @@ -757,7 +768,7 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_long __pyx_t_5numpy_int_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":750 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":750 * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< @@ -766,7 +777,7 @@ typedef npy_long __pyx_t_5numpy_int_t; */ typedef npy_longlong __pyx_t_5numpy_long_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":751 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":751 * ctypedef npy_long int_t * ctypedef npy_longlong long_t * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< @@ -775,7 +786,7 @@ typedef npy_longlong __pyx_t_5numpy_long_t; */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":753 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":753 * ctypedef npy_longlong longlong_t * * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< @@ -784,7 +795,7 @@ typedef npy_longlong __pyx_t_5numpy_longlong_t; */ typedef npy_ulong __pyx_t_5numpy_uint_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":754 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":754 * * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< @@ -793,7 +804,7 @@ typedef npy_ulong __pyx_t_5numpy_uint_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":755 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":755 * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< @@ -802,7 +813,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":757 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":757 * ctypedef npy_ulonglong ulonglong_t * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< @@ -811,7 +822,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":758 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":758 * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< @@ -820,7 +831,7 @@ typedef npy_intp __pyx_t_5numpy_intp_t; */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":760 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":760 * ctypedef npy_uintp uintp_t * * ctypedef npy_double float_t # <<<<<<<<<<<<<< @@ -829,7 +840,7 @@ typedef npy_uintp __pyx_t_5numpy_uintp_t; */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":761 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":761 * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< @@ -838,7 +849,7 @@ typedef npy_double __pyx_t_5numpy_float_t; */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":762 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":762 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< @@ -882,7 +893,7 @@ static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(do /*--- Type declarations ---*/ -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":764 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":764 * ctypedef npy_longdouble longdouble_t * * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< @@ -891,7 +902,7 @@ static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(do */ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":765 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":765 * * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< @@ -900,7 +911,7 @@ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; */ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":766 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":766 * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< @@ -909,7 +920,7 @@ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; */ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":768 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":768 * ctypedef npy_clongdouble clongdouble_t * * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< @@ -1101,23 +1112,6 @@ static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* s return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); } -/* ListCompAppend.proto */ -#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS -static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { - PyListObject* L = (PyListObject*) list; - Py_ssize_t len = Py_SIZE(list); - if (likely(L->allocated > len)) { - Py_INCREF(x); - PyList_SET_ITEM(list, len, x); - Py_SIZE(list) = len+1; - return 0; - } - return PyList_Append(list, x); -} -#else -#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) -#endif - /* GetModuleGlobalName.proto */ static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); @@ -1139,6 +1133,31 @@ static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObje #define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL) #endif +/* ListCompAppend.proto */ +#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS +static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { + PyListObject* L = (PyListObject*) list; + Py_ssize_t len = Py_SIZE(list); + if (likely(L->allocated > len)) { + Py_INCREF(x); + PyList_SET_ITEM(list, len, x); + Py_SIZE(list) = len+1; + return 0; + } + return PyList_Append(list, x); +} +#else +#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) +#endif + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + /* PyThreadStateGet.proto */ #if CYTHON_FAST_THREAD_STATE #define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; @@ -1229,14 +1248,6 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /* ImportFrom.proto */ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); -/* PyObjectCallMethO.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -/* PyObjectCallOneArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - /* PyObjectCallNoArg.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); @@ -1280,6 +1291,13 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned P #define __PYX_FORCE_INIT_THREADS 0 #endif +/* Print.proto */ +static int __Pyx_Print(PyObject*, PyObject *, int); +#if CYTHON_COMPILING_IN_PYPY || PY_MAJOR_VERSION >= 3 +static PyObject* __pyx_print = 0; +static PyObject* __pyx_print_kwargs = 0; +#endif + /* RealImag.proto */ #if CYTHON_CCOMPLEX #ifdef __cplusplus @@ -1396,6 +1414,9 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( /* CIntFromPy.proto */ static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *); +/* PrintOne.proto */ +static int __Pyx_PrintOne(PyObject* stream, PyObject *o); + /* CheckBinaryVersion.proto */ static int __Pyx_check_binary_version(void); @@ -1510,8 +1531,10 @@ static const char __pyx_k_l1[] = "_l1"; static const char __pyx_k_np[] = "np"; static const char __pyx_k_wv[] = "wv"; static const char __pyx_k__14[] = "*"; +static const char __pyx_k_end[] = "end"; static const char __pyx_k_REAL[] = "REAL"; static const char __pyx_k_code[] = "code"; +static const char __pyx_k_file[] = "file"; static const char __pyx_k_init[] = "init"; static const char __pyx_k_item[] = "item"; static const char __pyx_k_l1_2[] = "l1"; @@ -1530,16 +1553,19 @@ static const char __pyx_k_d_res[] = "d_res"; static const char __pyx_k_dtype[] = "dtype"; static const char __pyx_k_fblas[] = "fblas"; static const char __pyx_k_index[] = "index"; +static const char __pyx_k_max_n[] = "max_n"; +static const char __pyx_k_min_n[] = "min_n"; static const char __pyx_k_model[] = "model"; static const char __pyx_k_numpy[] = "numpy"; static const char __pyx_k_p_res[] = "p_res"; static const char __pyx_k_point[] = "point"; +static const char __pyx_k_print[] = "print"; static const char __pyx_k_range[] = "range"; static const char __pyx_k_token[] = "token"; static const char __pyx_k_vocab[] = "vocab"; +static const char __pyx_k_bucket[] = "bucket"; static const char __pyx_k_import[] = "__import__"; static const char __pyx_k_neu1_2[] = "neu1"; -static const char __pyx_k_ngrams[] = "ngrams"; static const char __pyx_k_points[] = "points"; static const char __pyx_k_random[] = "random"; static const char __pyx_k_sample[] = "sample"; @@ -1548,9 +1574,11 @@ static const char __pyx_k_window[] = "window"; static const char __pyx_k_work_2[] = "work"; static const char __pyx_k_alpha_2[] = "_alpha"; static const char __pyx_k_float32[] = "float32"; +static const char __pyx_k_ft_hash[] = "ft_hash"; static const char __pyx_k_idx_end[] = "idx_end"; static const char __pyx_k_indexes[] = "indexes"; static const char __pyx_k_randint[] = "randint"; +static const char __pyx_k_subword[] = "subword"; static const char __pyx_k_syn1neg[] = "syn1neg"; static const char __pyx_k_vlookup[] = "vlookup"; static const char __pyx_k_codelens[] = "codelens"; @@ -1563,16 +1591,14 @@ static const char __pyx_k_cum_table[] = "cum_table"; static const char __pyx_k_enumerate[] = "enumerate"; static const char __pyx_k_idx_start[] = "idx_start"; static const char __pyx_k_sentences[] = "sentences"; -static const char __pyx_k_subword_i[] = "subword_i"; static const char __pyx_k_ValueError[] = "ValueError"; -static const char __pyx_k_index2word[] = "index2word"; +static const char __pyx_k_hash2index[] = "hash2index"; static const char __pyx_k_sample_int[] = "sample_int"; static const char __pyx_k_syn0_vocab[] = "syn0_vocab"; static const char __pyx_k_trainables[] = "trainables"; static const char __pyx_k_vocabulary[] = "vocabulary"; static const char __pyx_k_ImportError[] = "ImportError"; static const char __pyx_k_next_random[] = "next_random"; -static const char __pyx_k_ngrams_word[] = "ngrams_word"; static const char __pyx_k_syn0_ngrams[] = "syn0_ngrams"; static const char __pyx_k_vector_size[] = "vector_size"; static const char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; @@ -1582,6 +1608,7 @@ static const char __pyx_k_subwords_idx[] = "subwords_idx"; static const char __pyx_k_cum_table_len[] = "cum_table_len"; static const char __pyx_k_vectors_vocab[] = "vectors_vocab"; static const char __pyx_k_word_subwords[] = "word_subwords"; +static const char __pyx_k_compute_ngrams[] = "compute_ngrams"; static const char __pyx_k_subword_arrays[] = "subword_arrays"; static const char __pyx_k_train_batch_sg[] = "train_batch_sg"; static const char __pyx_k_vectors_ngrams[] = "vectors_ngrams"; @@ -1590,22 +1617,25 @@ static const char __pyx_k_reduced_windows[] = "reduced_windows"; static const char __pyx_k_subwords_idx_len[] = "subwords_idx_len"; static const char __pyx_k_train_batch_cbow[] = "train_batch_cbow"; static const char __pyx_k_word_locks_vocab[] = "word_locks_vocab"; +static const char __pyx_k_CYTHON_BATCH_CBOW[] = "CYTHON BATCH_CBOW!"; static const char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static const char __pyx_k_word_locks_ngrams[] = "word_locks_ngrams"; static const char __pyx_k_MAX_WORDS_IN_BATCH[] = "MAX_WORDS_IN_BATCH"; +static const char __pyx_k_utils_any2vec_fast[] = "utils_any2vec_fast"; static const char __pyx_k_effective_sentences[] = "effective_sentences"; static const char __pyx_k_vectors_vocab_lockf[] = "vectors_vocab_lockf"; static const char __pyx_k_vectors_ngrams_lockf[] = "vectors_ngrams_lockf"; static const char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; static const char __pyx_k_gensim_models_fasttext_inner[] = "gensim.models.fasttext_inner"; +static const char __pyx_k_home_jojo_projekte_gensim_gensi[] = "/home/jojo/projekte/gensim/gensim/models/fasttext_inner.pyx"; static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import"; static const char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static const char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; static const char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; -static const char __pyx_k_gensim_models_fasttext_inner_pyx[] = "gensim/models/fasttext_inner.pyx"; static const char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous"; static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import"; static const char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short."; +static PyObject *__pyx_kp_s_CYTHON_BATCH_CBOW; static PyObject *__pyx_n_s_FAST_VERSION; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2; @@ -1619,29 +1649,34 @@ static PyObject *__pyx_n_s__14; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; static PyObject *__pyx_n_s_array; +static PyObject *__pyx_n_s_bucket; static PyObject *__pyx_n_s_cbow_mean; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; static PyObject *__pyx_n_s_codes; +static PyObject *__pyx_n_s_compute_ngrams; static PyObject *__pyx_n_s_cum_table; static PyObject *__pyx_n_s_cum_table_len; static PyObject *__pyx_n_s_d_res; static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_effective_sentences; static PyObject *__pyx_n_s_effective_words; +static PyObject *__pyx_n_s_end; static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; static PyObject *__pyx_n_s_fblas; +static PyObject *__pyx_n_s_file; static PyObject *__pyx_n_s_float32; +static PyObject *__pyx_n_s_ft_hash; static PyObject *__pyx_n_s_gensim_models_fasttext_inner; -static PyObject *__pyx_kp_s_gensim_models_fasttext_inner_pyx; +static PyObject *__pyx_n_s_hash2index; +static PyObject *__pyx_kp_s_home_jojo_projekte_gensim_gensi; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_idx_end; static PyObject *__pyx_n_s_idx_start; static PyObject *__pyx_n_s_import; static PyObject *__pyx_n_s_index; -static PyObject *__pyx_n_s_index2word; static PyObject *__pyx_n_s_indexes; static PyObject *__pyx_n_s_init; static PyObject *__pyx_n_s_item; @@ -1650,6 +1685,8 @@ static PyObject *__pyx_n_s_k; static PyObject *__pyx_n_s_l1; static PyObject *__pyx_n_s_l1_2; static PyObject *__pyx_n_s_main; +static PyObject *__pyx_n_s_max_n; +static PyObject *__pyx_n_s_min_n; static PyObject *__pyx_n_s_model; static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous; static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou; @@ -1657,8 +1694,6 @@ static PyObject *__pyx_n_s_negative; static PyObject *__pyx_n_s_neu1; static PyObject *__pyx_n_s_neu1_2; static PyObject *__pyx_n_s_next_random; -static PyObject *__pyx_n_s_ngrams; -static PyObject *__pyx_n_s_ngrams_word; static PyObject *__pyx_n_s_np; static PyObject *__pyx_n_s_numpy; static PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to; @@ -1666,6 +1701,7 @@ static PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor; static PyObject *__pyx_n_s_p_res; static PyObject *__pyx_n_s_point; static PyObject *__pyx_n_s_points; +static PyObject *__pyx_n_s_print; static PyObject *__pyx_n_s_randint; static PyObject *__pyx_n_s_random; static PyObject *__pyx_n_s_range; @@ -1678,8 +1714,8 @@ static PyObject *__pyx_n_s_sent_idx; static PyObject *__pyx_n_s_sentence_idx; static PyObject *__pyx_n_s_sentences; static PyObject *__pyx_n_s_size; +static PyObject *__pyx_n_s_subword; static PyObject *__pyx_n_s_subword_arrays; -static PyObject *__pyx_n_s_subword_i; static PyObject *__pyx_n_s_subwords; static PyObject *__pyx_n_s_subwords_idx; static PyObject *__pyx_n_s_subwords_idx_len; @@ -1694,6 +1730,7 @@ static PyObject *__pyx_n_s_train_batch_sg; static PyObject *__pyx_n_s_trainables; static PyObject *__pyx_n_s_uint32; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; +static PyObject *__pyx_n_s_utils_any2vec_fast; static PyObject *__pyx_n_s_vector_size; static PyObject *__pyx_n_s_vectors_ngrams; static PyObject *__pyx_n_s_vectors_ngrams_lockf; @@ -1743,7 +1780,7 @@ static PyObject *__pyx_codeobj__18; static PyObject *__pyx_codeobj__20; /* "gensim/models/fasttext_inner.pyx":43 - * + * cdef REAL_t ONEF = 1.0 * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -2102,7 +2139,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L0; /* "gensim/models/fasttext_inner.pyx":43 - * + * cdef REAL_t ONEF = 1.0 * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -3473,7 +3510,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject *__pyx_v_subwords = NULL; PyObject *__pyx_v_word_subwords = NULL; PyObject *__pyx_v_item = NULL; - PyObject *__pyx_v_subword_i = NULL; + PyObject *__pyx_v_subword = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -3493,13 +3530,15 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5numpy_uint32_t __pyx_t_15; PyObject *__pyx_t_16 = NULL; PyObject *__pyx_t_17 = NULL; - Py_ssize_t __pyx_t_18; - PyObject *(*__pyx_t_19)(PyObject *); - int __pyx_t_20; - int __pyx_t_21; + PyObject *__pyx_t_18 = NULL; + PyObject *__pyx_t_19 = NULL; + Py_ssize_t __pyx_t_20; + PyObject *(*__pyx_t_21)(PyObject *); int __pyx_t_22; int __pyx_t_23; int __pyx_t_24; + int __pyx_t_25; + int __pyx_t_26; __Pyx_RefNannySetupContext("train_batch_sg", 0); /* "gensim/models/fasttext_inner.pyx":248 @@ -4103,7 +4142,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] */ __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); @@ -4114,166 +4153,286 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON /* "gensim/models/fasttext_inner.pyx":320 * indexes[effective_words] = word.index * - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = (len(subwords) + 1) */ __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":321 + * + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) + */ + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_16 = NULL; + __pyx_t_2 = 0; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_16 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_16)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_16); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_2 = 1; + } + } + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(__pyx_t_1)) { + PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + } else + #endif + #if CYTHON_FAST_PYCCALL + if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { + PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + } else + #endif + { + __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (__pyx_t_16) { + __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; + } + __Pyx_INCREF(__pyx_v_token); + __Pyx_GIVEREF(__pyx_v_token); + PyTuple_SET_ITEM(__pyx_t_19, 0+__pyx_t_2, __pyx_v_token); + __Pyx_GIVEREF(__pyx_t_17); + PyTuple_SET_ITEM(__pyx_t_19, 1+__pyx_t_2, __pyx_t_17); + __Pyx_GIVEREF(__pyx_t_18); + PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); + __pyx_t_17 = 0; + __pyx_t_18 = 0; + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { - __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; - __pyx_t_19 = NULL; + __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; + __pyx_t_21 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 321, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { - if (likely(!__pyx_t_19)) { - if (likely(PyList_CheckExact(__pyx_t_17))) { - if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; + if (likely(!__pyx_t_21)) { + if (likely(PyList_CheckExact(__pyx_t_1))) { + if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { - if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; + if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } } else { - __pyx_t_13 = __pyx_t_19(__pyx_t_17); + __pyx_t_13 = __pyx_t_21(__pyx_t_1); if (unlikely(!__pyx_t_13)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 320, __pyx_L1_error) + else __PYX_ERR(0, 321, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_13); } - __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); + __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); __pyx_t_13 = 0; + + /* "gensim/models/fasttext_inner.pyx":320 + * indexes[effective_words] = word.index + * + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + */ __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_17 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { + __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_18); + if (likely(__pyx_t_17)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_18); + __Pyx_INCREF(__pyx_t_17); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_18, function); + } + } + if (!__pyx_t_17) { + __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + } else { + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(__pyx_t_18)) { + PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_GOTREF(__pyx_t_13); + } else + #endif + #if CYTHON_FAST_PYCCALL + if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { + PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_GOTREF(__pyx_t_13); + } else + #endif + { + __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; + __Pyx_INCREF(__pyx_v_subword); + __Pyx_GIVEREF(__pyx_v_subword); + PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + } + } + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + + /* "gensim/models/fasttext_inner.pyx":321 + * + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) + */ } - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":321 - * - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + /* "gensim/models/fasttext_inner.pyx":322 + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_18 = PyList_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); __Pyx_GIVEREF(__pyx_t_14); - PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); + PyList_SET_ITEM(__pyx_t_18, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_14 = PyNumber_Add(__pyx_t_18, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __pyx_t_18 = PyTuple_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); __Pyx_GIVEREF(__pyx_t_14); - PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); + PyTuple_SET_ITEM(__pyx_t_18, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyDict_New(); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_14 = PyDict_New(); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_18, __pyx_t_14); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); - __pyx_t_16 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); + __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":322 - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + /* "gensim/models/fasttext_inner.pyx":323 + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 322, __pyx_L1_error) - (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); + __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 323, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_20 + 1)); - /* "gensim/models/fasttext_inner.pyx":323 + /* "gensim/models/fasttext_inner.pyx":324 * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 323, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 324, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":325 + /* "gensim/models/fasttext_inner.pyx":326 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 325, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 325, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 326, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 326, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":327 + /* "gensim/models/fasttext_inner.pyx":328 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4283,46 +4442,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":328 + /* "gensim/models/fasttext_inner.pyx":329 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 328, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 328, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 329, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 329, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":329 + /* "gensim/models/fasttext_inner.pyx":330 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 329, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 329, __pyx_L1_error) - (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 330, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 330, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":330 + /* "gensim/models/fasttext_inner.pyx":331 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 330, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 330, __pyx_L1_error) - (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 331, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":327 + /* "gensim/models/fasttext_inner.pyx":328 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4331,7 +4490,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":332 + /* "gensim/models/fasttext_inner.pyx":333 * points[effective_words] = np.PyArray_DATA(word.point) * * effective_words += 1 # <<<<<<<<<<<<<< @@ -4340,7 +4499,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":333 + /* "gensim/models/fasttext_inner.pyx":334 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4350,7 +4509,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":334 + /* "gensim/models/fasttext_inner.pyx":335 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -4359,7 +4518,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":333 + /* "gensim/models/fasttext_inner.pyx":334 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4380,7 +4539,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":339 + /* "gensim/models/fasttext_inner.pyx":340 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_19 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 347, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __pyx_t_10 = Py_TYPE(__pyx_t_19)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 347, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { if (likely(!__pyx_t_10)) { - if (likely(PyList_CheckExact(__pyx_t_16))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; + if (likely(PyList_CheckExact(__pyx_t_19))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 347, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 347, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 347, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 347, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } } else { - __pyx_t_3 = __pyx_t_10(__pyx_t_16); + __pyx_t_3 = __pyx_t_10(__pyx_t_19); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 346, __pyx_L1_error) + else __PYX_ERR(0, 347, __pyx_L1_error) } break; } @@ -4552,17 +4711,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":347 + /* "gensim/models/fasttext_inner.pyx":348 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * with nogil: */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 348, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":346 + /* "gensim/models/fasttext_inner.pyx":347 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -4570,9 +4729,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ } - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":349 + /* "gensim/models/fasttext_inner.pyx":350 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4586,7 +4745,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":350 + /* "gensim/models/fasttext_inner.pyx":351 * * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -4594,10 +4753,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * idx_end = sentence_idx[sent_idx + 1] */ __pyx_t_2 = __pyx_v_effective_sentences; - for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { - __pyx_v_sent_idx = __pyx_t_20; + for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { + __pyx_v_sent_idx = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":351 + /* "gensim/models/fasttext_inner.pyx":352 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -4606,7 +4765,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":352 + /* "gensim/models/fasttext_inner.pyx":353 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -4615,18 +4774,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":353 + /* "gensim/models/fasttext_inner.pyx":354 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < idx_start: */ - __pyx_t_21 = __pyx_v_idx_end; - for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { - __pyx_v_i = __pyx_t_22; + __pyx_t_23 = __pyx_v_idx_end; + for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { + __pyx_v_i = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":354 + /* "gensim/models/fasttext_inner.pyx":355 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4635,7 +4794,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":355 + /* "gensim/models/fasttext_inner.pyx":356 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4645,7 +4804,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":356 + /* "gensim/models/fasttext_inner.pyx":357 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -4654,7 +4813,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":355 + /* "gensim/models/fasttext_inner.pyx":356 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4663,7 +4822,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":357 + /* "gensim/models/fasttext_inner.pyx":358 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4672,7 +4831,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":359 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4682,7 +4841,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":359 + /* "gensim/models/fasttext_inner.pyx":360 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -4691,7 +4850,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":359 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4700,18 +4859,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":360 + /* "gensim/models/fasttext_inner.pyx":361 * if k > idx_end: * k = idx_end * for j in range(j, k): # <<<<<<<<<<<<<< * if j == i: * continue */ - __pyx_t_23 = __pyx_v_k; - for (__pyx_t_24 = __pyx_v_j; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { - __pyx_v_j = __pyx_t_24; + __pyx_t_25 = __pyx_v_k; + for (__pyx_t_26 = __pyx_v_j; __pyx_t_26 < __pyx_t_25; __pyx_t_26+=1) { + __pyx_v_j = __pyx_t_26; - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":362 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4721,7 +4880,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":362 + /* "gensim/models/fasttext_inner.pyx":363 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< @@ -4730,7 +4889,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L33_continue; - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":362 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4739,7 +4898,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":363 + /* "gensim/models/fasttext_inner.pyx":364 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4749,7 +4908,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":365 * continue * if hs: * fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -4758,7 +4917,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":363 + /* "gensim/models/fasttext_inner.pyx":364 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4767,7 +4926,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":368 + /* "gensim/models/fasttext_inner.pyx":369 * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4777,7 +4936,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":369 + /* "gensim/models/fasttext_inner.pyx":370 * word_locks_ngrams) * if negative: * next_random = fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -4786,7 +4945,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":368 + /* "gensim/models/fasttext_inner.pyx":369 * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4800,7 +4959,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":349 + /* "gensim/models/fasttext_inner.pyx":350 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4818,7 +4977,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":374 + /* "gensim/models/fasttext_inner.pyx":375 * next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -4826,10 +4985,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 374, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_r = __pyx_t_16; - __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 375, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __pyx_r = __pyx_t_19; + __pyx_t_19 = 0; goto __pyx_L0; /* "gensim/models/fasttext_inner.pyx":247 @@ -4849,6 +5008,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_16); __Pyx_XDECREF(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_18); + __Pyx_XDECREF(__pyx_t_19); __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -4860,13 +5021,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_v_subwords); __Pyx_XDECREF(__pyx_v_word_subwords); __Pyx_XDECREF(__pyx_v_item); - __Pyx_XDECREF(__pyx_v_subword_i); + __Pyx_XDECREF(__pyx_v_subword); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":377 +/* "gensim/models/fasttext_inner.pyx":378 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -4909,26 +5070,26 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 377, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 378, __pyx_L3_error) } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 377, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 378, __pyx_L3_error) } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 377, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 378, __pyx_L3_error) } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 377, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 378, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 377, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 378, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -4947,7 +5108,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 377, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 378, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5003,7 +5164,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject *__pyx_v_subwords = NULL; PyObject *__pyx_v_word_subwords = NULL; PyObject *__pyx_v_item = NULL; - PyObject *__pyx_v_subword_i = NULL; + PyObject *__pyx_v_subword = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -5023,174 +5184,176 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5numpy_uint32_t __pyx_t_15; PyObject *__pyx_t_16 = NULL; PyObject *__pyx_t_17 = NULL; - Py_ssize_t __pyx_t_18; - PyObject *(*__pyx_t_19)(PyObject *); - int __pyx_t_20; - int __pyx_t_21; + PyObject *__pyx_t_18 = NULL; + PyObject *__pyx_t_19 = NULL; + Py_ssize_t __pyx_t_20; + PyObject *(*__pyx_t_21)(PyObject *); int __pyx_t_22; + int __pyx_t_23; + int __pyx_t_24; __Pyx_RefNannySetupContext("train_batch_cbow", 0); - /* "gensim/models/fasttext_inner.pyx":378 + /* "gensim/models/fasttext_inner.pyx":379 * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 378, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 378, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":379 + /* "gensim/models/fasttext_inner.pyx":380 * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 379, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 379, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":381 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":381 + /* "gensim/models/fasttext_inner.pyx":382 * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":383 + /* "gensim/models/fasttext_inner.pyx":384 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 383, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 384, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":384 + /* "gensim/models/fasttext_inner.pyx":385 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 384, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 385, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":385 + /* "gensim/models/fasttext_inner.pyx":386 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 385, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":386 + /* "gensim/models/fasttext_inner.pyx":387 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 387, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":389 + /* "gensim/models/fasttext_inner.pyx":390 * * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.wv.vector_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 389, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 390, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":390 + /* "gensim/models/fasttext_inner.pyx":391 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.wv.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 391, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 391, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 391, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":396 + /* "gensim/models/fasttext_inner.pyx":397 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 396, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 397, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 396, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 397, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":399 + /* "gensim/models/fasttext_inner.pyx":400 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -5200,21 +5363,30 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":419 + /* "gensim/models/fasttext_inner.pyx":420 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * - * if hs: + * print "CYTHON BATCH_CBOW!" */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 419, __pyx_L1_error) + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 420, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":421 + /* "gensim/models/fasttext_inner.pyx":422 * subword_arrays = {} * + * print "CYTHON BATCH_CBOW!" # <<<<<<<<<<<<<< + * if hs: + * syn1 = (np.PyArray_DATA(model.trainables.syn1)) + */ + if (__Pyx_PrintOne(0, __pyx_kp_s_CYTHON_BATCH_CBOW) < 0) __PYX_ERR(0, 422, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":423 + * + * print "CYTHON BATCH_CBOW!" * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * @@ -5222,32 +5394,32 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":422 - * + /* "gensim/models/fasttext_inner.pyx":424 + * print "CYTHON BATCH_CBOW!" * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 424, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 424, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 422, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 424, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":421 - * subword_arrays = {} + /* "gensim/models/fasttext_inner.pyx":423 * + * print "CYTHON BATCH_CBOW!" * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * */ } - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":426 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5257,55 +5429,55 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":425 + /* "gensim/models/fasttext_inner.pyx":427 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 427, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":426 + /* "gensim/models/fasttext_inner.pyx":428 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 426, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 428, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":429 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":426 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5314,7 +5486,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":428 + /* "gensim/models/fasttext_inner.pyx":430 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5332,41 +5504,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":431 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":428 + /* "gensim/models/fasttext_inner.pyx":430 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5375,42 +5547,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":432 + /* "gensim/models/fasttext_inner.pyx":434 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 432, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 434, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":433 + /* "gensim/models/fasttext_inner.pyx":435 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 433, __pyx_L1_error) + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 435, __pyx_L1_error) __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":436 + /* "gensim/models/fasttext_inner.pyx":438 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 438, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 438, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":437 + /* "gensim/models/fasttext_inner.pyx":439 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -5419,7 +5591,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":438 + /* "gensim/models/fasttext_inner.pyx":440 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -5430,26 +5602,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 440, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5459,7 +5631,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 438, __pyx_L1_error) + else __PYX_ERR(0, 440, __pyx_L1_error) } break; } @@ -5468,27 +5640,27 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":439 + /* "gensim/models/fasttext_inner.pyx":441 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":440 + /* "gensim/models/fasttext_inner.pyx":442 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< * for token in sent: - * word = vlookup[token] if token in vlookup else None + * print(token) */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":439 + /* "gensim/models/fasttext_inner.pyx":441 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -5497,37 +5669,37 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":443 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< + * print(token) * word = vlookup[token] if token in vlookup else None - * if word is None: */ if (likely(PyList_CheckExact(__pyx_v_sent)) || PyTuple_CheckExact(__pyx_v_sent)) { __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 443, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -5537,7 +5709,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 441, __pyx_L1_error) + else __PYX_ERR(0, 443, __pyx_L1_error) } break; } @@ -5546,16 +5718,25 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":444 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: + * print(token) # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + if (__Pyx_PrintOne(0, __pyx_v_token) < 0) __PYX_ERR(0, 444, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":445 + * for token in sent: + * print(token) * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -5566,8 +5747,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":443 - * for token in sent: + /* "gensim/models/fasttext_inner.pyx":446 + * print(token) * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window @@ -5577,7 +5758,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":447 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -5586,8 +5767,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":443 - * for token in sent: + /* "gensim/models/fasttext_inner.pyx":446 + * print(token) * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window @@ -5595,7 +5776,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":445 + /* "gensim/models/fasttext_inner.pyx":448 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5608,20 +5789,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":449 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -5630,7 +5811,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":445 + /* "gensim/models/fasttext_inner.pyx":448 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5639,172 +5820,292 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":447 + /* "gensim/models/fasttext_inner.pyx":450 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 447, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 447, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":449 + /* "gensim/models/fasttext_inner.pyx":452 * indexes[effective_words] = word.index * - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = len(subwords) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":453 + * + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = len(subwords) + */ + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_16 = NULL; + __pyx_t_2 = 0; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_16 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_16)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_16); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_2 = 1; + } + } + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(__pyx_t_1)) { + PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + } else + #endif + #if CYTHON_FAST_PYCCALL + if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { + PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + } else + #endif + { + __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (__pyx_t_16) { + __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; + } + __Pyx_INCREF(__pyx_v_token); + __Pyx_GIVEREF(__pyx_v_token); + PyTuple_SET_ITEM(__pyx_t_19, 0+__pyx_t_2, __pyx_v_token); + __Pyx_GIVEREF(__pyx_t_17); + PyTuple_SET_ITEM(__pyx_t_19, 1+__pyx_t_2, __pyx_t_17); + __Pyx_GIVEREF(__pyx_t_18); + PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); + __pyx_t_17 = 0; + __pyx_t_18 = 0; + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { - __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; - __pyx_t_19 = NULL; + __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; + __pyx_t_21 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 453, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { - if (likely(!__pyx_t_19)) { - if (likely(PyList_CheckExact(__pyx_t_17))) { - if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; + if (likely(!__pyx_t_21)) { + if (likely(PyList_CheckExact(__pyx_t_1))) { + if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 453, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { - if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; + if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 453, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } } else { - __pyx_t_13 = __pyx_t_19(__pyx_t_17); + __pyx_t_13 = __pyx_t_21(__pyx_t_1); if (unlikely(!__pyx_t_13)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 449, __pyx_L1_error) + else __PYX_ERR(0, 453, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_13); } - __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); + __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":452 + * indexes[effective_words] = word.index + * + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + * word_subwords = np.array(subwords, dtype=np.uint32) + */ + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_17 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { + __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_18); + if (likely(__pyx_t_17)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_18); + __Pyx_INCREF(__pyx_t_17); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_18, function); + } + } + if (!__pyx_t_17) { + __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + } else { + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(__pyx_t_18)) { + PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_GOTREF(__pyx_t_13); + } else + #endif + #if CYTHON_FAST_PYCCALL + if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { + PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_GOTREF(__pyx_t_13); + } else + #endif + { + __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; + __Pyx_INCREF(__pyx_v_subword); + __Pyx_GIVEREF(__pyx_v_subword); + PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + } + } + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 452, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + + /* "gensim/models/fasttext_inner.pyx":453 + * + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = len(subwords) + */ } - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":450 - * - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + /* "gensim/models/fasttext_inner.pyx":454 + * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 450, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(__pyx_v_subwords); __Pyx_GIVEREF(__pyx_v_subwords); PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_v_subwords); - __pyx_t_13 = PyDict_New(); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 450, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_18 = PyDict_New(); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 454, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_13, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_14, __pyx_t_13); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + if (PyDict_SetItem(__pyx_t_18, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 454, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, __pyx_t_18); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); - __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); + __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":451 - * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + /* "gensim/models/fasttext_inner.pyx":455 + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 451, __pyx_L1_error) - (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_18); + __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 455, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":452 + /* "gensim/models/fasttext_inner.pyx":456 * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 452, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 456, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":454 + /* "gensim/models/fasttext_inner.pyx":458 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 458, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 458, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":456 + /* "gensim/models/fasttext_inner.pyx":460 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -5814,46 +6115,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":457 + /* "gensim/models/fasttext_inner.pyx":461 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 457, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 457, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 461, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 461, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":458 + /* "gensim/models/fasttext_inner.pyx":462 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 458, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 458, __pyx_L1_error) - (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 462, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 462, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":459 + /* "gensim/models/fasttext_inner.pyx":463 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 459, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 459, __pyx_L1_error) - (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 463, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 463, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":456 + /* "gensim/models/fasttext_inner.pyx":460 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -5862,7 +6163,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":460 + /* "gensim/models/fasttext_inner.pyx":464 * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 # <<<<<<<<<<<<<< @@ -5871,7 +6172,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":465 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -5881,7 +6182,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":466 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -5890,7 +6191,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":465 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -5899,19 +6200,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":443 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< + * print(token) * word = vlookup[token] if token in vlookup else None - * if word is None: */ __pyx_L11_continue:; } __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":467 + /* "gensim/models/fasttext_inner.pyx":471 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 474, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_19 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 478, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __pyx_t_10 = Py_TYPE(__pyx_t_19)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 478, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { if (likely(!__pyx_t_10)) { - if (likely(PyList_CheckExact(__pyx_t_16))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; + if (likely(PyList_CheckExact(__pyx_t_19))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 474, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 478, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 474, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 478, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 474, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 478, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 474, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 478, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } } else { - __pyx_t_3 = __pyx_t_10(__pyx_t_16); + __pyx_t_3 = __pyx_t_10(__pyx_t_19); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 474, __pyx_L1_error) + else __PYX_ERR(0, 478, __pyx_L1_error) } break; } @@ -6083,17 +6384,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":475 + /* "gensim/models/fasttext_inner.pyx":479 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 475, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":474 + /* "gensim/models/fasttext_inner.pyx":478 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -6101,9 +6402,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ } - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":478 + /* "gensim/models/fasttext_inner.pyx":482 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6117,7 +6418,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":479 + /* "gensim/models/fasttext_inner.pyx":483 * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -6125,10 +6426,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * idx_end = sentence_idx[sent_idx + 1] */ __pyx_t_2 = __pyx_v_effective_sentences; - for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { - __pyx_v_sent_idx = __pyx_t_20; + for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { + __pyx_v_sent_idx = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":480 + /* "gensim/models/fasttext_inner.pyx":484 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -6137,7 +6438,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":481 + /* "gensim/models/fasttext_inner.pyx":485 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -6146,18 +6447,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":482 + /* "gensim/models/fasttext_inner.pyx":486 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < idx_start: */ - __pyx_t_21 = __pyx_v_idx_end; - for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { - __pyx_v_i = __pyx_t_22; + __pyx_t_23 = __pyx_v_idx_end; + for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { + __pyx_v_i = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":483 + /* "gensim/models/fasttext_inner.pyx":487 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6166,7 +6467,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":488 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6176,7 +6477,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":485 + /* "gensim/models/fasttext_inner.pyx":489 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -6185,7 +6486,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":488 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6194,7 +6495,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":490 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6203,7 +6504,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":491 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6213,7 +6514,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":492 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -6222,7 +6523,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":491 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6231,7 +6532,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":494 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6241,7 +6542,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":491 + /* "gensim/models/fasttext_inner.pyx":495 * * if hs: * fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -6250,7 +6551,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":494 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6259,7 +6560,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":495 + /* "gensim/models/fasttext_inner.pyx":499 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6269,7 +6570,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":496 + /* "gensim/models/fasttext_inner.pyx":500 * word_locks_ngrams) * if negative: * next_random = fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -6278,7 +6579,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":495 + /* "gensim/models/fasttext_inner.pyx":499 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6290,7 +6591,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":478 + /* "gensim/models/fasttext_inner.pyx":482 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6308,7 +6609,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":501 + /* "gensim/models/fasttext_inner.pyx":505 * cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -6316,13 +6617,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 501, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_r = __pyx_t_16; - __pyx_t_16 = 0; + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 505, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __pyx_r = __pyx_t_19; + __pyx_t_19 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":377 + /* "gensim/models/fasttext_inner.pyx":378 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6339,6 +6640,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_16); __Pyx_XDECREF(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_18); + __Pyx_XDECREF(__pyx_t_19); __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -6350,13 +6653,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF(__pyx_v_subwords); __Pyx_XDECREF(__pyx_v_word_subwords); __Pyx_XDECREF(__pyx_v_item); - __Pyx_XDECREF(__pyx_v_subword_i); + __Pyx_XDECREF(__pyx_v_subword); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":504 +/* "gensim/models/fasttext_inner.pyx":508 * * * def init(): # <<<<<<<<<<<<<< @@ -6395,7 +6698,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":513 + /* "gensim/models/fasttext_inner.pyx":517 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6405,7 +6708,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":514 + /* "gensim/models/fasttext_inner.pyx":518 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6415,7 +6718,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":515 + /* "gensim/models/fasttext_inner.pyx":519 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6424,7 +6727,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":516 + /* "gensim/models/fasttext_inner.pyx":520 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6433,7 +6736,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":521 + /* "gensim/models/fasttext_inner.pyx":525 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6443,7 +6746,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":522 + /* "gensim/models/fasttext_inner.pyx":526 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6452,7 +6755,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":523 + /* "gensim/models/fasttext_inner.pyx":527 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6461,7 +6764,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":524 + /* "gensim/models/fasttext_inner.pyx":528 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6471,7 +6774,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":527 + /* "gensim/models/fasttext_inner.pyx":531 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6480,7 +6783,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":528 + /* "gensim/models/fasttext_inner.pyx":532 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6489,7 +6792,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":529 + /* "gensim/models/fasttext_inner.pyx":533 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6499,7 +6802,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":530 + /* "gensim/models/fasttext_inner.pyx":534 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6508,7 +6811,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":531 + /* "gensim/models/fasttext_inner.pyx":535 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6517,7 +6820,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":532 + /* "gensim/models/fasttext_inner.pyx":536 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6529,7 +6832,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":529 + /* "gensim/models/fasttext_inner.pyx":533 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6538,7 +6841,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":537 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6548,7 +6851,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":538 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6557,7 +6860,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":539 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6566,7 +6869,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":536 + /* "gensim/models/fasttext_inner.pyx":540 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6578,7 +6881,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":537 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6587,7 +6890,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":540 + /* "gensim/models/fasttext_inner.pyx":544 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6597,7 +6900,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":541 + /* "gensim/models/fasttext_inner.pyx":545 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6606,7 +6909,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":542 + /* "gensim/models/fasttext_inner.pyx":546 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6619,7 +6922,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":504 + /* "gensim/models/fasttext_inner.pyx":508 * * * def init(): # <<<<<<<<<<<<<< @@ -6634,7 +6937,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":197 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -6681,7 +6984,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_GIVEREF(__pyx_v_info->obj); } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":203 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":203 * # of flags * * if info == NULL: return # <<<<<<<<<<<<<< @@ -6694,7 +6997,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L0; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":206 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":206 * * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -6703,7 +7006,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_endian_detector = 1; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":207 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":207 * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -6712,7 +7015,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":209 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":209 * cdef bint little_endian = ((&endian_detector)[0] != 0) * * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<< @@ -6721,7 +7024,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_ndim = PyArray_NDIM(__pyx_v_self); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":211 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":211 * ndim = PyArray_NDIM(self) * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -6731,7 +7034,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":212 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":212 * * if sizeof(npy_intp) != sizeof(Py_ssize_t): * copy_shape = 1 # <<<<<<<<<<<<<< @@ -6740,7 +7043,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_copy_shape = 1; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":211 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":211 * ndim = PyArray_NDIM(self) * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -6750,7 +7053,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L4; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":214 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":214 * copy_shape = 1 * else: * copy_shape = 0 # <<<<<<<<<<<<<< @@ -6762,7 +7065,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L4:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":216 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":216 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6776,7 +7079,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L6_bool_binop_done; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":217 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":217 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -6787,7 +7090,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = __pyx_t_2; __pyx_L6_bool_binop_done:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":216 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":216 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6796,7 +7099,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -6809,7 +7112,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 218, __pyx_L1_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":216 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":216 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6818,7 +7121,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":220 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6832,7 +7135,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L9_bool_binop_done; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":221 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":221 * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -6843,7 +7146,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = __pyx_t_2; __pyx_L9_bool_binop_done:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":220 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6852,7 +7155,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -6865,7 +7168,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 222, __pyx_L1_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":220 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6874,7 +7177,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":224 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":224 * raise ValueError(u"ndarray is not Fortran contiguous") * * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<< @@ -6883,7 +7186,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->buf = PyArray_DATA(__pyx_v_self); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":225 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":225 * * info.buf = PyArray_DATA(self) * info.ndim = ndim # <<<<<<<<<<<<<< @@ -6892,7 +7195,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->ndim = __pyx_v_ndim; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":226 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":226 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -6902,7 +7205,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (__pyx_v_copy_shape != 0); if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":229 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":229 * # Allocate new buffer for strides and shape info. * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) # <<<<<<<<<<<<<< @@ -6911,7 +7214,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2))); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":230 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":230 * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim # <<<<<<<<<<<<<< @@ -6920,7 +7223,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":231 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":231 * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim * for i in range(ndim): # <<<<<<<<<<<<<< @@ -6931,7 +7234,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":232 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":232 * info.shape = info.strides + ndim * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<< @@ -6940,7 +7243,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":233 * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<< @@ -6950,7 +7253,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]); } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":226 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":226 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -6960,7 +7263,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L11; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":235 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":235 * info.shape[i] = PyArray_DIMS(self)[i] * else: * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<< @@ -6970,7 +7273,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P /*else*/ { __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self)); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":236 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":236 * else: * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -6981,7 +7284,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L11:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":237 * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -6990,7 +7293,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->suboffsets = NULL; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":238 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":238 * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<< @@ -6999,7 +7302,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":239 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":239 * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<< @@ -7008,7 +7311,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0)); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":242 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":242 * * cdef int t * cdef char* f = NULL # <<<<<<<<<<<<<< @@ -7017,7 +7320,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_f = NULL; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":243 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":243 * cdef int t * cdef char* f = NULL * cdef dtype descr = self.descr # <<<<<<<<<<<<<< @@ -7029,7 +7332,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3); __pyx_t_3 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":246 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":246 * cdef int offset * * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<< @@ -7038,7 +7341,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":248 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":248 * cdef bint hasfields = PyDataType_HASFIELDS(descr) * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< @@ -7056,7 +7359,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L15_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":250 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":250 * if not hasfields and not copy_shape: * # do not call releasebuffer * info.obj = None # <<<<<<<<<<<<<< @@ -7069,7 +7372,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = Py_None; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":248 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":248 * cdef bint hasfields = PyDataType_HASFIELDS(descr) * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< @@ -7079,7 +7382,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L14; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":253 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":253 * else: * # need to call releasebuffer * info.obj = self # <<<<<<<<<<<<<< @@ -7095,7 +7398,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L14:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":255 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":255 * info.obj = self * * if not hasfields: # <<<<<<<<<<<<<< @@ -7105,7 +7408,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0); if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":256 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":256 * * if not hasfields: * t = descr.type_num # <<<<<<<<<<<<<< @@ -7115,7 +7418,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_4 = __pyx_v_descr->type_num; __pyx_v_t = __pyx_t_4; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":257 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":257 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -7135,7 +7438,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L20_next_or:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":258 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":258 * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -7152,7 +7455,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = __pyx_t_2; __pyx_L19_bool_binop_done:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":257 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":257 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -7161,7 +7464,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":259 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -7174,7 +7477,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 259, __pyx_L1_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":257 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":257 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -7183,7 +7486,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":260 * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<< @@ -7195,7 +7498,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"b"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":261 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":261 * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<< @@ -7206,7 +7509,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"B"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":262 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":262 * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<< @@ -7217,7 +7520,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"h"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":263 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":263 * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<< @@ -7228,7 +7531,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"H"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":264 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":264 * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<< @@ -7239,7 +7542,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"i"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":265 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":265 * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<< @@ -7250,7 +7553,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"I"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":266 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":266 * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<< @@ -7261,7 +7564,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"l"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":267 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":267 * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<< @@ -7272,7 +7575,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"L"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":268 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":268 * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<< @@ -7283,7 +7586,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"q"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":269 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":269 * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<< @@ -7294,7 +7597,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Q"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":270 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":270 * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<< @@ -7305,7 +7608,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"f"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":271 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":271 * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<< @@ -7316,7 +7619,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"d"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":272 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":272 * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<< @@ -7327,7 +7630,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"g"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":273 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":273 * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<< @@ -7338,7 +7641,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Zf"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":274 * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<< @@ -7349,7 +7652,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Zd"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":275 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":275 * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<< @@ -7360,7 +7663,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Zg"); break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":276 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":276 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -7372,7 +7675,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; default: - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":278 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":278 * elif t == NPY_OBJECT: f = "O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -7398,7 +7701,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":279 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":279 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f # <<<<<<<<<<<<<< @@ -7407,7 +7710,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = __pyx_v_f; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":280 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":280 * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f * return # <<<<<<<<<<<<<< @@ -7417,7 +7720,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_r = 0; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":255 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":255 * info.obj = self * * if not hasfields: # <<<<<<<<<<<<<< @@ -7426,7 +7729,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":282 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":282 * return * else: * info.format = stdlib.malloc(_buffer_format_string_len) # <<<<<<<<<<<<<< @@ -7436,7 +7739,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P /*else*/ { __pyx_v_info->format = ((char *)malloc(0xFF)); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":283 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":283 * else: * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<< @@ -7445,7 +7748,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->format[0]) = '^'; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":284 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":284 * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 # <<<<<<<<<<<<<< @@ -7454,7 +7757,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_offset = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":285 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":285 * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<< @@ -7464,7 +7767,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 0xFF), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) __PYX_ERR(1, 285, __pyx_L1_error) __pyx_v_f = __pyx_t_7; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":288 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":288 * info.format + _buffer_format_string_len, * &offset) * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<< @@ -7474,7 +7777,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P (__pyx_v_f[0]) = '\x00'; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":197 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -7506,7 +7809,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":290 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":290 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -7530,7 +7833,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s int __pyx_t_1; __Pyx_RefNannySetupContext("__releasebuffer__", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":291 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":291 * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< @@ -7540,7 +7843,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0); if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":292 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":292 * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): * stdlib.free(info.format) # <<<<<<<<<<<<<< @@ -7549,7 +7852,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ free(__pyx_v_info->format); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":291 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":291 * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< @@ -7558,7 +7861,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":293 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":293 * if PyArray_HASFIELDS(self): * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -7568,7 +7871,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":294 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":294 * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): * stdlib.free(info.strides) # <<<<<<<<<<<<<< @@ -7577,7 +7880,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ free(__pyx_v_info->strides); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":293 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":293 * if PyArray_HASFIELDS(self): * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -7586,7 +7889,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":290 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":290 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -7598,7 +7901,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __Pyx_RefNannyFinishContext(); } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":770 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":770 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -7612,7 +7915,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":771 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":771 * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< @@ -7626,7 +7929,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":770 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":770 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -7645,7 +7948,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":773 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":773 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -7659,7 +7962,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":774 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":774 * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< @@ -7673,7 +7976,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":773 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":773 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -7692,7 +7995,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":776 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":776 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -7706,7 +8009,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":777 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":777 * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< @@ -7720,7 +8023,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":776 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":776 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -7739,7 +8042,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":779 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":779 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -7753,7 +8056,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":780 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":780 * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< @@ -7767,7 +8070,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":779 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":779 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -7786,7 +8089,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":782 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":782 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -7800,7 +8103,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":783 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":783 * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< @@ -7814,7 +8117,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":782 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":782 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -7833,7 +8136,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":785 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":785 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -7862,7 +8165,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx char *__pyx_t_9; __Pyx_RefNannySetupContext("_util_dtypestring", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":790 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":790 * * cdef dtype child * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -7871,7 +8174,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_endian_detector = 1; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":791 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":791 * cdef dtype child * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -7880,7 +8183,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":794 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -7903,7 +8206,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3); __pyx_t_3 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":795 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":795 * * for childname in descr.names: * fields = descr.fields[childname] # <<<<<<<<<<<<<< @@ -7920,7 +8223,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3)); __pyx_t_3 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":796 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":796 * for childname in descr.names: * fields = descr.fields[childname] * child, new_offset = fields # <<<<<<<<<<<<<< @@ -7959,7 +8262,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":798 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":798 * child, new_offset = fields * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< @@ -7976,7 +8279,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0); if (__pyx_t_6) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":799 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":799 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -7989,7 +8292,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 799, __pyx_L1_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":798 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":798 * child, new_offset = fields * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< @@ -7998,7 +8301,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":801 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -8018,7 +8321,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L8_next_or:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":802 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":802 * * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -8035,7 +8338,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = __pyx_t_7; __pyx_L7_bool_binop_done:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":801 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -8044,7 +8347,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ if (__pyx_t_6) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":803 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":803 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -8057,7 +8360,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 803, __pyx_L1_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":801 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -8066,7 +8369,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":813 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":813 * * # Output padding bytes * while offset[0] < new_offset: # <<<<<<<<<<<<<< @@ -8082,7 +8385,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_6) break; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":814 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":814 * # Output padding bytes * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<< @@ -8091,7 +8394,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ (__pyx_v_f[0]) = 0x78; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":815 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":815 * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte * f += 1 # <<<<<<<<<<<<<< @@ -8100,7 +8403,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":816 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":816 * f[0] = 120 # "x"; pad byte * f += 1 * offset[0] += 1 # <<<<<<<<<<<<<< @@ -8111,7 +8414,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1); } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":818 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":818 * offset[0] += 1 * * offset[0] += child.itemsize # <<<<<<<<<<<<<< @@ -8121,7 +8424,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_8 = 0; (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":820 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":820 * offset[0] += child.itemsize * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< @@ -8131,7 +8434,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0); if (__pyx_t_6) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":821 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":821 * * if not PyDataType_HASFIELDS(child): * t = child.type_num # <<<<<<<<<<<<<< @@ -8143,7 +8446,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":822 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":822 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< @@ -8153,7 +8456,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0); if (__pyx_t_6) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":823 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -8166,7 +8469,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __PYX_ERR(1, 823, __pyx_L1_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":822 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":822 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< @@ -8175,7 +8478,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":826 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":826 * * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<< @@ -8193,7 +8496,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":827 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":827 * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<< @@ -8211,7 +8514,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":828 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":828 * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<< @@ -8229,7 +8532,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":829 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":829 * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<< @@ -8247,7 +8550,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":830 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":830 * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<< @@ -8265,7 +8568,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":831 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":831 * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<< @@ -8283,7 +8586,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":832 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":832 * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<< @@ -8301,7 +8604,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":833 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":833 * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<< @@ -8319,7 +8622,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":834 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":834 * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<< @@ -8337,7 +8640,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":835 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":835 * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<< @@ -8355,7 +8658,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":836 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":836 * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<< @@ -8373,7 +8676,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":837 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":837 * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<< @@ -8391,7 +8694,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":838 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":838 * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<< @@ -8409,7 +8712,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":839 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":839 * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<< @@ -8429,7 +8732,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":840 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":840 * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<< @@ -8449,7 +8752,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":841 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":841 * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<< @@ -8469,7 +8772,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":842 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":842 * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<< @@ -8487,7 +8790,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":844 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":844 * elif t == NPY_OBJECT: f[0] = 79 #"O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -8511,7 +8814,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L15:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":845 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":845 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * f += 1 # <<<<<<<<<<<<<< @@ -8520,7 +8823,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":820 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":820 * offset[0] += child.itemsize * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< @@ -8530,7 +8833,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L13; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":849 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":849 * # Cython ignores struct boundary information ("T{...}"), * # so don't output it * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<< @@ -8543,7 +8846,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L13:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":794 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -8553,7 +8856,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":850 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":850 * # so don't output it * f = _util_dtypestring(child, f, end, offset) * return f # <<<<<<<<<<<<<< @@ -8563,7 +8866,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_r = __pyx_v_f; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":785 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":785 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -8588,7 +8891,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":966 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":966 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -8603,7 +8906,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a int __pyx_t_2; __Pyx_RefNannySetupContext("set_array_base", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":968 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":968 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -8614,7 +8917,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":969 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":969 * cdef PyObject* baseptr * if base is None: * baseptr = NULL # <<<<<<<<<<<<<< @@ -8623,7 +8926,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_v_baseptr = NULL; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":968 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":968 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -8633,7 +8936,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a goto __pyx_L3; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":971 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":971 * baseptr = NULL * else: * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<< @@ -8643,7 +8946,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a /*else*/ { Py_INCREF(__pyx_v_base); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":972 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":972 * else: * Py_INCREF(base) # important to do this before decref below! * baseptr = base # <<<<<<<<<<<<<< @@ -8654,7 +8957,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } __pyx_L3:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":973 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":973 * Py_INCREF(base) # important to do this before decref below! * baseptr = base * Py_XDECREF(arr.base) # <<<<<<<<<<<<<< @@ -8663,7 +8966,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_XDECREF(__pyx_v_arr->base); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":974 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":974 * baseptr = base * Py_XDECREF(arr.base) * arr.base = baseptr # <<<<<<<<<<<<<< @@ -8672,7 +8975,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_v_arr->base = __pyx_v_baseptr; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":966 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":966 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -8684,7 +8987,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __Pyx_RefNannyFinishContext(); } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":976 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":976 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -8698,7 +9001,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":977 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":977 * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< @@ -8708,7 +9011,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0); if (__pyx_t_1) { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":978 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":978 * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: * return None # <<<<<<<<<<<<<< @@ -8720,7 +9023,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_r = Py_None; goto __pyx_L0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":977 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":977 * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< @@ -8729,7 +9032,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py */ } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":980 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":980 * return None * else: * return arr.base # <<<<<<<<<<<<<< @@ -8743,7 +9046,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py goto __pyx_L0; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":976 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":976 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -8758,7 +9061,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":985 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":985 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -8779,7 +9082,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("import_array", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":986 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":986 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -8795,7 +9098,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":987 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":987 * cdef inline int import_array() except -1: * try: * _import_array() # <<<<<<<<<<<<<< @@ -8804,7 +9107,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { */ __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == -1)) __PYX_ERR(1, 987, __pyx_L3_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":986 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":986 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -8819,7 +9122,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_L3_error:; __Pyx_PyThreadState_assign - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":988 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":988 * try: * _import_array() * except Exception: # <<<<<<<<<<<<<< @@ -8834,7 +9137,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_GOTREF(__pyx_t_6); __Pyx_GOTREF(__pyx_t_7); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":989 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":989 * _import_array() * except Exception: * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< @@ -8850,7 +9153,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":986 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":986 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -8866,7 +9169,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_L10_try_end:; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":985 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":985 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -8889,7 +9192,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":991 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":991 * raise ImportError("numpy.core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -8910,7 +9213,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("import_umath", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":992 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":992 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -8926,7 +9229,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":993 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":993 * cdef inline int import_umath() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< @@ -8935,7 +9238,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { */ __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == -1)) __PYX_ERR(1, 993, __pyx_L3_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":992 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":992 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -8950,7 +9253,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_L3_error:; __Pyx_PyThreadState_assign - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":994 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":994 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -8965,7 +9268,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_GOTREF(__pyx_t_6); __Pyx_GOTREF(__pyx_t_7); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":995 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":995 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -8981,7 +9284,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":992 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":992 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -8997,7 +9300,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_L10_try_end:; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":991 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":991 * raise ImportError("numpy.core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -9020,7 +9323,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { return __pyx_r; } -/* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":997 +/* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":997 * raise ImportError("numpy.core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -9041,7 +9344,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("import_ufunc", 0); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":998 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":998 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -9057,7 +9360,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":999 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":999 * cdef inline int import_ufunc() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< @@ -9066,7 +9369,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { */ __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == -1)) __PYX_ERR(1, 999, __pyx_L3_error) - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":998 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":998 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -9081,7 +9384,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_L3_error:; __Pyx_PyThreadState_assign - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":1000 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":1000 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -9095,7 +9398,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_GOTREF(__pyx_t_6); __Pyx_GOTREF(__pyx_t_7); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":1001 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":1001 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -9109,7 +9412,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":998 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":998 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -9125,7 +9428,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_L10_try_end:; } - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":997 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":997 * raise ImportError("numpy.core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -9171,6 +9474,7 @@ static struct PyModuleDef __pyx_moduledef = { #endif static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_kp_s_CYTHON_BATCH_CBOW, __pyx_k_CYTHON_BATCH_CBOW, sizeof(__pyx_k_CYTHON_BATCH_CBOW), 0, 0, 1, 0}, {&__pyx_n_s_FAST_VERSION, __pyx_k_FAST_VERSION, sizeof(__pyx_k_FAST_VERSION), 0, 0, 1, 1}, {&__pyx_kp_u_Format_string_allocated_too_shor, __pyx_k_Format_string_allocated_too_shor, sizeof(__pyx_k_Format_string_allocated_too_shor), 0, 1, 0, 0}, {&__pyx_kp_u_Format_string_allocated_too_shor_2, __pyx_k_Format_string_allocated_too_shor_2, sizeof(__pyx_k_Format_string_allocated_too_shor_2), 0, 1, 0, 0}, @@ -9184,29 +9488,34 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, {&__pyx_n_s_array, __pyx_k_array, sizeof(__pyx_k_array), 0, 0, 1, 1}, + {&__pyx_n_s_bucket, __pyx_k_bucket, sizeof(__pyx_k_bucket), 0, 0, 1, 1}, {&__pyx_n_s_cbow_mean, __pyx_k_cbow_mean, sizeof(__pyx_k_cbow_mean), 0, 0, 1, 1}, {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, {&__pyx_n_s_codes, __pyx_k_codes, sizeof(__pyx_k_codes), 0, 0, 1, 1}, + {&__pyx_n_s_compute_ngrams, __pyx_k_compute_ngrams, sizeof(__pyx_k_compute_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_cum_table, __pyx_k_cum_table, sizeof(__pyx_k_cum_table), 0, 0, 1, 1}, {&__pyx_n_s_cum_table_len, __pyx_k_cum_table_len, sizeof(__pyx_k_cum_table_len), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_effective_sentences, __pyx_k_effective_sentences, sizeof(__pyx_k_effective_sentences), 0, 0, 1, 1}, {&__pyx_n_s_effective_words, __pyx_k_effective_words, sizeof(__pyx_k_effective_words), 0, 0, 1, 1}, + {&__pyx_n_s_end, __pyx_k_end, sizeof(__pyx_k_end), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, + {&__pyx_n_s_file, __pyx_k_file, sizeof(__pyx_k_file), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, + {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_fasttext_inner, __pyx_k_gensim_models_fasttext_inner, sizeof(__pyx_k_gensim_models_fasttext_inner), 0, 0, 1, 1}, - {&__pyx_kp_s_gensim_models_fasttext_inner_pyx, __pyx_k_gensim_models_fasttext_inner_pyx, sizeof(__pyx_k_gensim_models_fasttext_inner_pyx), 0, 0, 1, 0}, + {&__pyx_n_s_hash2index, __pyx_k_hash2index, sizeof(__pyx_k_hash2index), 0, 0, 1, 1}, + {&__pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_k_home_jojo_projekte_gensim_gensi, sizeof(__pyx_k_home_jojo_projekte_gensim_gensi), 0, 0, 1, 0}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_idx_end, __pyx_k_idx_end, sizeof(__pyx_k_idx_end), 0, 0, 1, 1}, {&__pyx_n_s_idx_start, __pyx_k_idx_start, sizeof(__pyx_k_idx_start), 0, 0, 1, 1}, {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, {&__pyx_n_s_index, __pyx_k_index, sizeof(__pyx_k_index), 0, 0, 1, 1}, - {&__pyx_n_s_index2word, __pyx_k_index2word, sizeof(__pyx_k_index2word), 0, 0, 1, 1}, {&__pyx_n_s_indexes, __pyx_k_indexes, sizeof(__pyx_k_indexes), 0, 0, 1, 1}, {&__pyx_n_s_init, __pyx_k_init, sizeof(__pyx_k_init), 0, 0, 1, 1}, {&__pyx_n_s_item, __pyx_k_item, sizeof(__pyx_k_item), 0, 0, 1, 1}, @@ -9215,6 +9524,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_l1, __pyx_k_l1, sizeof(__pyx_k_l1), 0, 0, 1, 1}, {&__pyx_n_s_l1_2, __pyx_k_l1_2, sizeof(__pyx_k_l1_2), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_max_n, __pyx_k_max_n, sizeof(__pyx_k_max_n), 0, 0, 1, 1}, + {&__pyx_n_s_min_n, __pyx_k_min_n, sizeof(__pyx_k_min_n), 0, 0, 1, 1}, {&__pyx_n_s_model, __pyx_k_model, sizeof(__pyx_k_model), 0, 0, 1, 1}, {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0}, {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0}, @@ -9222,8 +9533,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_neu1, __pyx_k_neu1, sizeof(__pyx_k_neu1), 0, 0, 1, 1}, {&__pyx_n_s_neu1_2, __pyx_k_neu1_2, sizeof(__pyx_k_neu1_2), 0, 0, 1, 1}, {&__pyx_n_s_next_random, __pyx_k_next_random, sizeof(__pyx_k_next_random), 0, 0, 1, 1}, - {&__pyx_n_s_ngrams, __pyx_k_ngrams, sizeof(__pyx_k_ngrams), 0, 0, 1, 1}, - {&__pyx_n_s_ngrams_word, __pyx_k_ngrams_word, sizeof(__pyx_k_ngrams_word), 0, 0, 1, 1}, {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, {&__pyx_kp_s_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 0, 1, 0}, @@ -9231,6 +9540,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_p_res, __pyx_k_p_res, sizeof(__pyx_k_p_res), 0, 0, 1, 1}, {&__pyx_n_s_point, __pyx_k_point, sizeof(__pyx_k_point), 0, 0, 1, 1}, {&__pyx_n_s_points, __pyx_k_points, sizeof(__pyx_k_points), 0, 0, 1, 1}, + {&__pyx_n_s_print, __pyx_k_print, sizeof(__pyx_k_print), 0, 0, 1, 1}, {&__pyx_n_s_randint, __pyx_k_randint, sizeof(__pyx_k_randint), 0, 0, 1, 1}, {&__pyx_n_s_random, __pyx_k_random, sizeof(__pyx_k_random), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, @@ -9243,8 +9553,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_sentence_idx, __pyx_k_sentence_idx, sizeof(__pyx_k_sentence_idx), 0, 0, 1, 1}, {&__pyx_n_s_sentences, __pyx_k_sentences, sizeof(__pyx_k_sentences), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, + {&__pyx_n_s_subword, __pyx_k_subword, sizeof(__pyx_k_subword), 0, 0, 1, 1}, {&__pyx_n_s_subword_arrays, __pyx_k_subword_arrays, sizeof(__pyx_k_subword_arrays), 0, 0, 1, 1}, - {&__pyx_n_s_subword_i, __pyx_k_subword_i, sizeof(__pyx_k_subword_i), 0, 0, 1, 1}, {&__pyx_n_s_subwords, __pyx_k_subwords, sizeof(__pyx_k_subwords), 0, 0, 1, 1}, {&__pyx_n_s_subwords_idx, __pyx_k_subwords_idx, sizeof(__pyx_k_subwords_idx), 0, 0, 1, 1}, {&__pyx_n_s_subwords_idx_len, __pyx_k_subwords_idx_len, sizeof(__pyx_k_subwords_idx_len), 0, 0, 1, 1}, @@ -9259,6 +9569,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_trainables, __pyx_k_trainables, sizeof(__pyx_k_trainables), 0, 0, 1, 1}, {&__pyx_n_s_uint32, __pyx_k_uint32, sizeof(__pyx_k_uint32), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, + {&__pyx_n_s_utils_any2vec_fast, __pyx_k_utils_any2vec_fast, sizeof(__pyx_k_utils_any2vec_fast), 0, 0, 1, 1}, {&__pyx_n_s_vector_size, __pyx_k_vector_size, sizeof(__pyx_k_vector_size), 0, 0, 1, 1}, {&__pyx_n_s_vectors_ngrams, __pyx_k_vectors_ngrams, sizeof(__pyx_k_vectors_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_vectors_ngrams_lockf, __pyx_k_vectors_ngrams_lockf, sizeof(__pyx_k_vectors_ngrams_lockf), 0, 0, 1, 1}, @@ -9282,7 +9593,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 18, __pyx_L1_error) __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 62, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 347, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 218, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 799, __pyx_L1_error) return 0; @@ -9308,21 +9619,21 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":431 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -9333,7 +9644,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__5); __Pyx_GIVEREF(__pyx_tuple__5); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -9344,7 +9655,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":259 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -9355,7 +9666,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":799 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":799 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -9366,7 +9677,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__8); __Pyx_GIVEREF(__pyx_tuple__8); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":803 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":803 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -9377,7 +9688,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":823 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -9388,7 +9699,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":989 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":989 * _import_array() * except Exception: * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< @@ -9399,7 +9710,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":995 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":995 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -9410,7 +9721,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__12); __Pyx_GIVEREF(__pyx_tuple__12); - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":1001 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":1001 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -9426,34 +9737,34 @@ static int __Pyx_InitCachedConstants(void) { * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 247, __pyx_L1_error) + __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 247, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 247, __pyx_L1_error) + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_train_batch_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 247, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":377 + /* "gensim/models/fasttext_inner.pyx":378 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 377, __pyx_L1_error) + __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 378, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_fasttext_inner_pyx, __pyx_n_s_train_batch_cbow, 377, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 377, __pyx_L1_error) + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_train_batch_cbow, 378, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 378, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":504 + /* "gensim/models/fasttext_inner.pyx":508 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_fasttext_inner_pyx, __pyx_n_s_init, 504, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_init, 508, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -9692,7 +10003,7 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< * - * from word2vec_inner cimport bisect_left, random_int32, \ + * from utils_any2vec_fast import compute_ngrams, ft_hash */ __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 20, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_10); @@ -9734,22 +10045,50 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) __pyx_L9_try_end:; } - /* "gensim/models/fasttext_inner.pyx":28 + /* "gensim/models/fasttext_inner.pyx":22 + * import scipy.linalg.blas as fblas + * + * from utils_any2vec_fast import compute_ngrams, ft_hash # <<<<<<<<<<<<<< + * from word2vec_inner cimport bisect_left, random_int32, \ + * scopy, saxpy, sdot, dsdot, snrm2, sscal, \ + */ + __pyx_t_9 = PyList_New(2); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + __Pyx_INCREF(__pyx_n_s_compute_ngrams); + __Pyx_GIVEREF(__pyx_n_s_compute_ngrams); + PyList_SET_ITEM(__pyx_t_9, 0, __pyx_n_s_compute_ngrams); + __Pyx_INCREF(__pyx_n_s_ft_hash); + __Pyx_GIVEREF(__pyx_n_s_ft_hash); + PyList_SET_ITEM(__pyx_t_9, 1, __pyx_n_s_ft_hash); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_utils_any2vec_fast, __pyx_t_9, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_9 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_compute_ngrams, __pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_9 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_ft_hash); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_ft_hash, __pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":29 * our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas * * REAL = np.float32 # <<<<<<<<<<<<<< * * DEF MAX_SENTENCE_LEN = 10000 */ - __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 28, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 28, __pyx_L1_error) + __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_3) < 0) __PYX_ERR(0, 28, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_float32); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 29, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_9) < 0) __PYX_ERR(0, 29, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":39 + /* "gensim/models/fasttext_inner.pyx":40 * cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -9758,12 +10097,12 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) */ __pyx_v_6gensim_6models_14fasttext_inner_ONE = 1; - /* "gensim/models/fasttext_inner.pyx":40 + /* "gensim/models/fasttext_inner.pyx":41 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< * - * + * cdef unsigned long long fast_sentence_sg_neg( */ __pyx_v_6gensim_6models_14fasttext_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); @@ -9774,84 +10113,84 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 247, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_3) < 0) __PYX_ERR(0, 247, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":377 + /* "gensim/models/fasttext_inner.pyx":378 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 377, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_3) < 0) __PYX_ERR(0, 377, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 378, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 378, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":504 + /* "gensim/models/fasttext_inner.pyx":508 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 504, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_3) < 0) __PYX_ERR(0, 504, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 508, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 508, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":544 + /* "gensim/models/fasttext_inner.pyx":548 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN * */ - __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 544, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); + __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 548, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); __pyx_t_7 = NULL; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_9))) { - __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_9); + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) { + __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_7)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_9); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_9, function); + __Pyx_DECREF_SET(__pyx_t_3, function); } } if (__pyx_t_7) { - __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_9, __pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 544, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 548, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { - __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 544, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 548, __pyx_L1_error) } - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_3) < 0) __PYX_ERR(0, 544, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 548, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":545 + /* "gensim/models/fasttext_inner.pyx":549 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< * */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 545, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 549, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False * # cython: wraparound=False */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = PyDict_New(); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_9) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "../../../../usr/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":997 + /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":997 * raise ImportError("numpy.core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -10250,8 +10589,72 @@ static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, P } #endif // CYTHON_FAST_PYCCALL +/* PyObjectCallMethO */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectCallOneArg */ + #if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCall(func, &arg, 1); + } +#endif +#ifdef __Pyx_CyFunction_USED + if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { +#else + if (likely(PyCFunction_Check(func))) { +#endif + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); +#if CYTHON_FAST_PYCCALL + } else if (PyCFunction_GET_FLAGS(func) & METH_FASTCALL) { + return __Pyx_PyCFunction_FastCall(func, &arg, 1); +#endif + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_Pack(1, arg); + if (unlikely(!args)) return NULL; + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +#endif + /* PyErrFetchRestore */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { PyObject *tmp_type, *tmp_value, *tmp_tb; tmp_type = tstate->curexc_type; @@ -10275,7 +10678,7 @@ static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject #endif /* RaiseException */ - #if PY_MAJOR_VERSION < 3 + #if PY_MAJOR_VERSION < 3 static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, CYTHON_UNUSED PyObject *cause) { __Pyx_PyThreadState_declare @@ -10438,25 +10841,25 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject #endif /* RaiseTooManyValuesToUnpack */ - static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { + static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { PyErr_Format(PyExc_ValueError, "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); } /* RaiseNeedMoreValuesToUnpack */ - static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { + static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { PyErr_Format(PyExc_ValueError, "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", index, (index == 1) ? "" : "s"); } /* RaiseNoneIterError */ - static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { + static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); } /* SaveResetException */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { *type = tstate->exc_type; *value = tstate->exc_value; @@ -10480,7 +10883,7 @@ static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject #endif /* PyErrExceptionMatches */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err) { PyObject *exc_type = tstate->curexc_type; if (exc_type == err) return 1; @@ -10490,7 +10893,7 @@ static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tsta #endif /* GetException */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { #else static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) { @@ -10551,7 +10954,7 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) } /* Import */ - static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { PyObject *empty_list = 0; PyObject *module = 0; PyObject *global_dict = 0; @@ -10625,7 +11028,7 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) } /* ImportFrom */ - static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { + static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Format(PyExc_ImportError, @@ -10638,70 +11041,6 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) return value; } -/* PyObjectCallMethO */ - #if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = PyCFunction_GET_FUNCTION(func); - self = PyCFunction_GET_SELF(func); - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectCallOneArg */ - #if CYTHON_COMPILING_IN_CPYTHON -static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *result; - PyObject *args = PyTuple_New(1); - if (unlikely(!args)) return NULL; - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 0, arg); - result = __Pyx_PyObject_Call(func, args, NULL); - Py_DECREF(args); - return result; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { -#if CYTHON_FAST_PYCALL - if (PyFunction_Check(func)) { - return __Pyx_PyFunction_FastCall(func, &arg, 1); - } -#endif -#ifdef __Pyx_CyFunction_USED - if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { -#else - if (likely(PyCFunction_Check(func))) { -#endif - if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { - return __Pyx_PyObject_CallMethO(func, arg); -#if CYTHON_FAST_PYCCALL - } else if (PyCFunction_GET_FLAGS(func) & METH_FASTCALL) { - return __Pyx_PyCFunction_FastCall(func, &arg, 1); -#endif - } - } - return __Pyx__PyObject_CallOneArg(func, arg); -} -#else -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *result; - PyObject *args = PyTuple_Pack(1, arg); - if (unlikely(!args)) return NULL; - result = __Pyx_PyObject_Call(func, args, NULL); - Py_DECREF(args); - return result; -} -#endif - /* PyObjectCallNoArg */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { @@ -11030,6 +11369,112 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, } } +/* Print */ + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3 +static PyObject *__Pyx_GetStdout(void) { + PyObject *f = PySys_GetObject((char *)"stdout"); + if (!f) { + PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); + } + return f; +} +static int __Pyx_Print(PyObject* f, PyObject *arg_tuple, int newline) { + int i; + if (!f) { + if (!(f = __Pyx_GetStdout())) + return -1; + } + Py_INCREF(f); + for (i=0; i < PyTuple_GET_SIZE(arg_tuple); i++) { + PyObject* v; + if (PyFile_SoftSpace(f, 1)) { + if (PyFile_WriteString(" ", f) < 0) + goto error; + } + v = PyTuple_GET_ITEM(arg_tuple, i); + if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0) + goto error; + if (PyString_Check(v)) { + char *s = PyString_AsString(v); + Py_ssize_t len = PyString_Size(v); + if (len > 0) { + switch (s[len-1]) { + case ' ': break; + case '\f': case '\r': case '\n': case '\t': case '\v': + PyFile_SoftSpace(f, 0); + break; + default: break; + } + } + } + } + if (newline) { + if (PyFile_WriteString("\n", f) < 0) + goto error; + PyFile_SoftSpace(f, 0); + } + Py_DECREF(f); + return 0; +error: + Py_DECREF(f); + return -1; +} +#else +static int __Pyx_Print(PyObject* stream, PyObject *arg_tuple, int newline) { + PyObject* kwargs = 0; + PyObject* result = 0; + PyObject* end_string; + if (unlikely(!__pyx_print)) { + __pyx_print = PyObject_GetAttr(__pyx_b, __pyx_n_s_print); + if (!__pyx_print) + return -1; + } + if (stream) { + kwargs = PyDict_New(); + if (unlikely(!kwargs)) + return -1; + if (unlikely(PyDict_SetItem(kwargs, __pyx_n_s_file, stream) < 0)) + goto bad; + if (!newline) { + end_string = PyUnicode_FromStringAndSize(" ", 1); + if (unlikely(!end_string)) + goto bad; + if (PyDict_SetItem(kwargs, __pyx_n_s_end, end_string) < 0) { + Py_DECREF(end_string); + goto bad; + } + Py_DECREF(end_string); + } + } else if (!newline) { + if (unlikely(!__pyx_print_kwargs)) { + __pyx_print_kwargs = PyDict_New(); + if (unlikely(!__pyx_print_kwargs)) + return -1; + end_string = PyUnicode_FromStringAndSize(" ", 1); + if (unlikely(!end_string)) + return -1; + if (PyDict_SetItem(__pyx_print_kwargs, __pyx_n_s_end, end_string) < 0) { + Py_DECREF(end_string); + return -1; + } + Py_DECREF(end_string); + } + kwargs = __pyx_print_kwargs; + } + result = PyObject_Call(__pyx_print, arg_tuple, kwargs); + if (unlikely(kwargs) && (kwargs != __pyx_print_kwargs)) + Py_DECREF(kwargs); + if (!result) + return -1; + Py_DECREF(result); + return 0; +bad: + if (kwargs != __pyx_print_kwargs) + Py_XDECREF(kwargs); + return -1; +} +#endif + /* Declarations */ #if CYTHON_CCOMPLEX #ifdef __cplusplus @@ -12316,6 +12761,43 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, return (npy_uint32) -1; } +/* PrintOne */ + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3 +static int __Pyx_PrintOne(PyObject* f, PyObject *o) { + if (!f) { + if (!(f = __Pyx_GetStdout())) + return -1; + } + Py_INCREF(f); + if (PyFile_SoftSpace(f, 0)) { + if (PyFile_WriteString(" ", f) < 0) + goto error; + } + if (PyFile_WriteObject(o, f, Py_PRINT_RAW) < 0) + goto error; + if (PyFile_WriteString("\n", f) < 0) + goto error; + Py_DECREF(f); + return 0; +error: + Py_DECREF(f); + return -1; + /* the line below is just to avoid C compiler + * warnings about unused functions */ + return __Pyx_Print(f, NULL, 0); +} +#else +static int __Pyx_PrintOne(PyObject* stream, PyObject *o) { + int res; + PyObject* arg_tuple = PyTuple_Pack(1, o); + if (unlikely(!arg_tuple)) + return -1; + res = __Pyx_Print(stream, arg_tuple, 1); + Py_DECREF(arg_tuple); + return res; +} +#endif + /* CheckBinaryVersion */ static int __Pyx_check_binary_version(void) { char ctversion[4], rtversion[4]; diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 1a74b50ba5..388ccb1200 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -19,6 +19,7 @@ except ImportError: # in scipy > 0.15, fblas function has been removed import scipy.linalg.blas as fblas +from utils_any2vec_fast import compute_ngrams, ft_hash from word2vec_inner cimport bisect_left, random_int32, \ scopy, saxpy, sdot, dsdot, snrm2, sscal, \ REAL_t, EXP_TABLE, \ @@ -39,7 +40,6 @@ cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE cdef int ONE = 1 cdef REAL_t ONEF = 1.0 - cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, @@ -317,7 +317,8 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): continue indexes[effective_words] = word.index - subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] word_subwords = np.array([word.index] + subwords, dtype=np.uint32) subwords_idx_len[effective_words] = (len(subwords) + 1) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) @@ -446,7 +447,8 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): continue indexes[effective_words] = word.index - subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] word_subwords = np.array(subwords, dtype=np.uint32) subwords_idx_len[effective_words] = len(subwords) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index b35a974f4f..0aac2453c2 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -82,7 +82,7 @@ from six.moves import xrange, zip from scipy import sparse, stats from gensim.utils import deprecated -from gensim.models.utils_any2vec import _save_word2vec_format, _load_word2vec_format, _compute_ngrams +from gensim.models.utils_any2vec import _save_word2vec_format, _load_word2vec_format, _compute_ngrams, _ft_hash logger = logging.getLogger(__name__) @@ -1535,9 +1535,7 @@ def __init__(self, vector_size, min_n, max_n): self.vectors_vocab_norm = None self.vectors_ngrams = None self.vectors_ngrams_norm = None - self.ngrams = {} self.hash2index = {} - self.ngrams_word = {} self.min_n = min_n self.max_n = max_n self.num_ngram_vectors = 0 @@ -1570,9 +1568,9 @@ def __contains__(self, word): if word in self.vocab: return True else: - # from gensim.models.fasttext import compute_ngrams char_ngrams = _compute_ngrams(word, self.min_n, self.max_n) - return any(ng in self.ngrams for ng in char_ngrams) + return any(_ft_hash(ng) % self.bucket in self.hash2index + for ng in char_ngrams) def save(self, *args, **kwargs): """Saves the keyedvectors. This saved model can be loaded again using @@ -1603,13 +1601,14 @@ def word_vec(self, word, use_norm=False): # from gensim.models.fasttext import compute_ngrams word_vec = np.zeros(self.vectors_ngrams.shape[1], dtype=np.float32) ngrams = _compute_ngrams(word, self.min_n, self.max_n) - ngrams = [ng for ng in ngrams if ng in self.ngrams] if use_norm: ngram_weights = self.vectors_ngrams_norm else: ngram_weights = self.vectors_ngrams for ngram in ngrams: - word_vec += ngram_weights[self.ngrams[ngram]] + ngram_hash = _ft_hash(ngram) % self.bucket + if ngram_hash in self.hash2index: + word_vec += ngram_weights[self.hash2index[ngram_hash]] if word_vec.any(): return word_vec / len(ngrams) else: # No ngrams of the word are present in self.ngrams diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py index 05d50c07a4..570b620c72 100644 --- a/gensim/models/utils_any2vec.py +++ b/gensim/models/utils_any2vec.py @@ -38,7 +38,6 @@ def _ft_hash(string): The hash of the string """ - raise Exception("slow") # Runtime warnings for integer overflow are raised, this is expected behaviour. These warnings are suppressed. old_settings = np.seterr(all='ignore') h = np.uint32(2166136261) diff --git a/gensim/models/utils_any2vec_fast.c b/gensim/models/utils_any2vec_fast.c index e48741f295..8bcca72c8c 100644 --- a/gensim/models/utils_any2vec_fast.c +++ b/gensim/models/utils_any2vec_fast.c @@ -727,18 +727,6 @@ static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, in static CYTHON_INLINE int __Pyx_init_unicode_iteration( PyObject* ustring, Py_ssize_t *length, void** data, int *kind); -/* RaiseArgTupleInvalid.proto */ -static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, - Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); - -/* RaiseDoubleKeywords.proto */ -static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); - -/* ParseKeywords.proto */ -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ - PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ - const char* function_name); - /* PyObjectFormatSimple.proto */ #if CYTHON_COMPILING_IN_PYPY #define __Pyx_PyObject_FormatSimple(s, f) (\ @@ -804,6 +792,18 @@ static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) { static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( PyObject* text, Py_ssize_t start, Py_ssize_t stop); +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ + const char* function_name); + /* CodeObjectCache.proto */ typedef struct { PyCodeObject* code_object; @@ -846,6 +846,7 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /* Module declarations from 'gensim.models.utils_any2vec_fast' */ +static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyObject *, unsigned int, unsigned int, int __pyx_skip_dispatch); /*proto*/ #define __Pyx_MODULE_NAME "gensim.models.utils_any2vec_fast" int __pyx_module_is_main_gensim__models__utils_any2vec_fast = 0; @@ -854,7 +855,6 @@ static PyObject *__pyx_builtin_range; static const char __pyx_k_[] = "<"; static const char __pyx_k_c[] = "c"; static const char __pyx_k_h[] = "h"; -static const char __pyx_k_i[] = "i"; static const char __pyx_k__2[] = ">"; static const char __pyx_k_main[] = "__main__"; static const char __pyx_k_test[] = "__test__"; @@ -862,29 +862,20 @@ static const char __pyx_k_word[] = "word"; static const char __pyx_k_max_n[] = "max_n"; static const char __pyx_k_min_n[] = "min_n"; static const char __pyx_k_range[] = "range"; -static const char __pyx_k_ngrams[] = "ngrams"; static const char __pyx_k_string[] = "string"; static const char __pyx_k_ft_hash[] = "ft_hash"; -static const char __pyx_k_ngram_length[] = "ngram_length"; -static const char __pyx_k_extended_word[] = "extended_word"; -static const char __pyx_k_compute_ngrams[] = "compute_ngrams"; static const char __pyx_k_home_jojo_projekte_gensim_gensi[] = "/home/jojo/projekte/gensim/gensim/models/utils_any2vec_fast.pyx"; static const char __pyx_k_gensim_models_utils_any2vec_fast[] = "gensim.models.utils_any2vec_fast"; static PyObject *__pyx_kp_u_; static PyObject *__pyx_kp_u__2; static PyObject *__pyx_n_s_c; -static PyObject *__pyx_n_s_compute_ngrams; -static PyObject *__pyx_n_s_extended_word; static PyObject *__pyx_n_s_ft_hash; static PyObject *__pyx_n_s_gensim_models_utils_any2vec_fast; static PyObject *__pyx_n_s_h; static PyObject *__pyx_kp_s_home_jojo_projekte_gensim_gensi; -static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_max_n; static PyObject *__pyx_n_s_min_n; -static PyObject *__pyx_n_s_ngram_length; -static PyObject *__pyx_n_s_ngrams; static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_string; static PyObject *__pyx_n_s_test; @@ -894,9 +885,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; static PyObject *__pyx_tuple__3; -static PyObject *__pyx_tuple__5; static PyObject *__pyx_codeobj__4; -static PyObject *__pyx_codeobj__6; /* "gensim/models/utils_any2vec_fast.pyx":7 * # coding: utf-8 @@ -1024,85 +1013,13 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN /* "gensim/models/utils_any2vec_fast.pyx":15 * * - * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< * cdef unicode extended_word = f'<{word}>' * ngrams = [] */ -/* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_6gensim_6models_18utils_any2vec_fast_3compute_ngrams = {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_word = 0; - unsigned int __pyx_v_min_n; - unsigned int __pyx_v_max_n; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("compute_ngrams (wrapper)", 0); - { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_word,&__pyx_n_s_min_n,&__pyx_n_s_max_n,0}; - PyObject* values[3] = {0,0,0}; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args; - const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); - switch (pos_args) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = PyDict_Size(__pyx_kwds); - switch (pos_args) { - case 0: - if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word)) != 0)) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_min_n)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 1); __PYX_ERR(0, 15, __pyx_L3_error) - } - case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_n)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 2); __PYX_ERR(0, 15, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "compute_ngrams") < 0)) __PYX_ERR(0, 15, __pyx_L3_error) - } - } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - } - __pyx_v_word = ((PyObject*)values[0]); - __pyx_v_min_n = __Pyx_PyInt_As_unsigned_int(values[1]); if (unlikely((__pyx_v_min_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) - __pyx_v_max_n = __Pyx_PyInt_As_unsigned_int(values[2]); if (unlikely((__pyx_v_max_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error) - __pyx_L3_error:; - __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_word), (&PyUnicode_Type), 1, "word", 1))) __PYX_ERR(0, 15, __pyx_L1_error) - __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(__pyx_self, __pyx_v_word, __pyx_v_min_n, __pyx_v_max_n); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n) { +static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n, CYTHON_UNUSED int __pyx_skip_dispatch) { PyObject *__pyx_v_extended_word = 0; PyObject *__pyx_v_ngrams = NULL; PyObject *__pyx_v_ngram_length = NULL; @@ -1125,7 +1042,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C /* "gensim/models/utils_any2vec_fast.pyx":16 * - * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): * cdef unicode extended_word = f'<{word}>' # <<<<<<<<<<<<<< * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): @@ -1156,7 +1073,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C __pyx_t_4 = 0; /* "gensim/models/utils_any2vec_fast.pyx":17 - * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): * cdef unicode extended_word = f'<{word}>' * ngrams = [] # <<<<<<<<<<<<<< * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): @@ -1314,7 +1231,6 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) # <<<<<<<<<<<<<< * return ngrams - * */ __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error) __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_v_ngram_length); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) @@ -1350,8 +1266,6 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) * return ngrams # <<<<<<<<<<<<<< - * - * */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_ngrams); @@ -1361,7 +1275,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C /* "gensim/models/utils_any2vec_fast.pyx":15 * * - * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< * cdef unicode extended_word = f'<{word}>' * ngrams = [] */ @@ -1372,7 +1286,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_7); __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; + __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_extended_word); __Pyx_XDECREF(__pyx_v_ngrams); @@ -1383,7 +1297,103 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C return __pyx_r; } +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_word = 0; + unsigned int __pyx_v_min_n; + unsigned int __pyx_v_max_n; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("compute_ngrams (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_word,&__pyx_n_s_min_n,&__pyx_n_s_max_n,0}; + PyObject* values[3] = {0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_min_n)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 1); __PYX_ERR(0, 15, __pyx_L3_error) + } + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_n)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 2); __PYX_ERR(0, 15, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "compute_ngrams") < 0)) __PYX_ERR(0, 15, __pyx_L3_error) + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + } + __pyx_v_word = ((PyObject*)values[0]); + __pyx_v_min_n = __Pyx_PyInt_As_unsigned_int(values[1]); if (unlikely((__pyx_v_min_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) + __pyx_v_max_n = __Pyx_PyInt_As_unsigned_int(values[2]); if (unlikely((__pyx_v_max_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error) + __pyx_L3_error:; + __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_word), (&PyUnicode_Type), 1, "word", 1))) __PYX_ERR(0, 15, __pyx_L1_error) + __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(__pyx_self, __pyx_v_word, __pyx_v_min_n, __pyx_v_max_n); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("compute_ngrams", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(__pyx_v_word, __pyx_v_min_n, __pyx_v_max_n, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + static PyMethodDef __pyx_methods[] = { + {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}, {0, 0, 0, 0} }; @@ -1409,18 +1419,13 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0}, {&__pyx_kp_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 0}, {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, - {&__pyx_n_s_compute_ngrams, __pyx_k_compute_ngrams, sizeof(__pyx_k_compute_ngrams), 0, 0, 1, 1}, - {&__pyx_n_s_extended_word, __pyx_k_extended_word, sizeof(__pyx_k_extended_word), 0, 0, 1, 1}, {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_utils_any2vec_fast, __pyx_k_gensim_models_utils_any2vec_fast, sizeof(__pyx_k_gensim_models_utils_any2vec_fast), 0, 0, 1, 1}, {&__pyx_n_s_h, __pyx_k_h, sizeof(__pyx_k_h), 0, 0, 1, 1}, {&__pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_k_home_jojo_projekte_gensim_gensi, sizeof(__pyx_k_home_jojo_projekte_gensim_gensi), 0, 0, 1, 0}, - {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_max_n, __pyx_k_max_n, sizeof(__pyx_k_max_n), 0, 0, 1, 1}, {&__pyx_n_s_min_n, __pyx_k_min_n, sizeof(__pyx_k_min_n), 0, 0, 1, 1}, - {&__pyx_n_s_ngram_length, __pyx_k_ngram_length, sizeof(__pyx_k_ngram_length), 0, 0, 1, 1}, - {&__pyx_n_s_ngrams, __pyx_k_ngrams, sizeof(__pyx_k_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, {&__pyx_n_s_string, __pyx_k_string, sizeof(__pyx_k_string), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, @@ -1449,18 +1454,6 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); __pyx_codeobj__4 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__3, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_ft_hash, 7, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__4)) __PYX_ERR(0, 7, __pyx_L1_error) - - /* "gensim/models/utils_any2vec_fast.pyx":15 - * - * - * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< - * cdef unicode extended_word = f'<{word}>' - * ngrams = [] - */ - __pyx_tuple__5 = PyTuple_Pack(7, __pyx_n_s_word, __pyx_n_s_min_n, __pyx_n_s_max_n, __pyx_n_s_extended_word, __pyx_n_s_ngrams, __pyx_n_s_ngram_length, __pyx_n_s_i); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(0, 15, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__5); - __Pyx_GIVEREF(__pyx_tuple__5); - __pyx_codeobj__6 = (PyObject*)__Pyx_PyCode_New(3, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__5, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_compute_ngrams, 15, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__6)) __PYX_ERR(0, 15, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -1581,18 +1574,6 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_ft_hash, __pyx_t_1) < 0) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":15 - * - * - * def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< - * cdef unicode extended_word = f'<{word}>' - * ngrams = [] - */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, NULL, __pyx_n_s_gensim_models_utils_any2vec_fast); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_compute_ngrams, __pyx_t_1) < 0) __PYX_ERR(0, 15, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False @@ -1700,148 +1681,6 @@ static CYTHON_INLINE int __Pyx_init_unicode_iteration( return 0; } -/* RaiseArgTupleInvalid */ -static void __Pyx_RaiseArgtupleInvalid( - const char* func_name, - int exact, - Py_ssize_t num_min, - Py_ssize_t num_max, - Py_ssize_t num_found) -{ - Py_ssize_t num_expected; - const char *more_or_less; - if (num_found < num_min) { - num_expected = num_min; - more_or_less = "at least"; - } else { - num_expected = num_max; - more_or_less = "at most"; - } - if (exact) { - more_or_less = "exactly"; - } - PyErr_Format(PyExc_TypeError, - "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", - func_name, more_or_less, num_expected, - (num_expected == 1) ? "" : "s", num_found); -} - -/* RaiseDoubleKeywords */ -static void __Pyx_RaiseDoubleKeywordsError( - const char* func_name, - PyObject* kw_name) -{ - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION >= 3 - "%s() got multiple values for keyword argument '%U'", func_name, kw_name); - #else - "%s() got multiple values for keyword argument '%s'", func_name, - PyString_AsString(kw_name)); - #endif -} - -/* ParseKeywords */ -static int __Pyx_ParseOptionalKeywords( - PyObject *kwds, - PyObject **argnames[], - PyObject *kwds2, - PyObject *values[], - Py_ssize_t num_pos_args, - const char* function_name) -{ - PyObject *key = 0, *value = 0; - Py_ssize_t pos = 0; - PyObject*** name; - PyObject*** first_kw_arg = argnames + num_pos_args; - while (PyDict_Next(kwds, &pos, &key, &value)) { - name = first_kw_arg; - while (*name && (**name != key)) name++; - if (*name) { - values[name-argnames] = value; - continue; - } - name = first_kw_arg; - #if PY_MAJOR_VERSION < 3 - if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { - while (*name) { - if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) - && _PyString_Eq(**name, key)) { - values[name-argnames] = value; - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - if ((**argname == key) || ( - (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) - && _PyString_Eq(**argname, key))) { - goto arg_passed_twice; - } - argname++; - } - } - } else - #endif - if (likely(PyUnicode_Check(key))) { - while (*name) { - int cmp = (**name == key) ? 0 : - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : - #endif - PyUnicode_Compare(**name, key); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) { - values[name-argnames] = value; - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - int cmp = (**argname == key) ? 0 : - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : - #endif - PyUnicode_Compare(**argname, key); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) goto arg_passed_twice; - argname++; - } - } - } else - goto invalid_keyword_type; - if (kwds2) { - if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; - } else { - goto invalid_keyword; - } - } - return 0; -arg_passed_twice: - __Pyx_RaiseDoubleKeywordsError(function_name, key); - goto bad; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%.200s() keywords must be strings", function_name); - goto bad; -invalid_keyword: - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION < 3 - "%.200s() got an unexpected keyword argument '%.200s'", - function_name, PyString_AsString(key)); - #else - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif -bad: - return -1; -} - /* JoinPyUnicode */ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, CYTHON_UNUSED Py_UCS4 max_char) { @@ -2066,6 +1905,148 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( #endif } +/* RaiseArgTupleInvalid */ +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +/* RaiseDoubleKeywords */ +static void __Pyx_RaiseDoubleKeywordsError( + const char* func_name, + PyObject* kw_name) +{ + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION >= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + while (PyDict_Next(kwds, &pos, &key, &value)) { + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; + continue; + } + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = (**name == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION < 3 + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + return -1; +} + /* CodeObjectCache */ static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { int start = 0, mid = 0, end = count - 1; diff --git a/gensim/models/utils_any2vec_fast.pyx b/gensim/models/utils_any2vec_fast.pyx index 42d7618233..92d16585b5 100644 --- a/gensim/models/utils_any2vec_fast.pyx +++ b/gensim/models/utils_any2vec_fast.pyx @@ -12,12 +12,10 @@ def ft_hash(unicode string): return h -def compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): +cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): cdef unicode extended_word = f'<{word}>' ngrams = [] for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): for i in range(0, len(extended_word) - ngram_length + 1): ngrams.append(extended_word[i:i + ngram_length]) return ngrams - - diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index 801d86f598..7a8afd0d82 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -105,7 +105,6 @@ def test_persistence(self): loaded_wv = FastTextKeyedVectors.load(tmpf) self.assertTrue(np.allclose(wv.syn0_ngrams, loaded_wv.syn0_ngrams)) self.assertEqual(len(wv.vocab), len(loaded_wv.vocab)) - self.assertEqual(len(wv.ngrams), len(loaded_wv.ngrams)) @unittest.skipIf(IS_WIN32, "avoid memory error with Appveyor x32") def test_norm_vectors_not_saved(self): @@ -155,16 +154,16 @@ def test_load_fasttext_format(self): # vector for oov words are slightly different from original FastText due to discarding unused ngrams # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin expected_vec_oov = [ - -0.23825, - -0.58482, - -0.22276, - -0.41215, - 0.91015, - -1.6786, - -0.26724, - 0.58818, - 0.57828, - 0.75801 + -0.21929, + -0.53778, + -0.22463, + -0.41735, + 0.71737, + -1.59758, + -0.24833, + 0.62028, + 0.53203, + 0.77568 ] self.assertTrue(np.allclose(model["rejection"], expected_vec_oov, atol=1e-4)) @@ -206,16 +205,16 @@ def test_load_fasttext_new_format(self): # vector for oov words are slightly different from original FastText due to discarding unused ngrams # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin expected_vec_oov = [ - -0.53378, - -0.19, - 0.013482, - -0.86767, - -0.21684, - -0.89928, - 0.45124, - 0.18025, - -0.14128, - 0.22508 + -0.49111, + -0.13122, + -0.02109, + -0.88769, + -0.20105, + -0.91732, + 0.47243, + 0.19708, + -0.17856, + 0.19815 ] self.assertTrue(np.allclose(new_model["rejection"], expected_vec_oov, atol=1e-4)) @@ -295,8 +294,6 @@ def test_lookup(self): # Out of vocab check self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue(np.allclose(self.test_model['nights'], self.test_model[['nights']])) - # Word with no ngrams in model - self.assertRaises(KeyError, lambda: self.test_model['a!@']) def test_contains(self): # In vocab, sanity check @@ -305,29 +302,13 @@ def test_contains(self): # Out of vocab check self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue('nights' in self.test_model) - # Word with no ngrams in model - self.assertFalse('a!@' in self.test_model.wv.vocab) - self.assertFalse('a!@' in self.test_model) def test_wm_distance(self): doc = ['night', 'payment'] oov_doc = ['nights', 'forests', 'payments'] - ngrams_absent_doc = ['a!@', 'b#$'] dist = self.test_model.wmdistance(doc, oov_doc) self.assertNotEqual(float('inf'), dist) - dist = self.test_model.wmdistance(doc, ngrams_absent_doc) - self.assertEqual(float('inf'), dist) - - def test_doesnt_match(self): - oov_words = ['nights', 'forests', 'payments'] - # Out of vocab check - for word in oov_words: - self.assertFalse(word in self.test_model.wv.vocab) - try: - self.test_model.doesnt_match(oov_words) - except Exception: - self.fail('model.doesnt_match raises exception for oov words') def test_cbow_hs_training(self): @@ -448,15 +429,11 @@ def test_sg_neg_training(self): def test_online_learning(self): model_hs = FT_gensim(sentences, size=10, min_count=1, seed=42, hs=1, negative=0) self.assertTrue(len(model_hs.wv.vocab), 12) - self.assertTrue(len(model_hs.wv.ngrams), 202) self.assertTrue(model_hs.wv.vocab['graph'].count, 3) - self.assertFalse('tif' in model_hs.wv.ngrams) model_hs.build_vocab(new_sentences, update=True) # update vocab self.assertEqual(len(model_hs.wv.vocab), 14) - self.assertTrue(len(model_hs.wv.ngrams), 271) self.assertTrue(model_hs.wv.vocab['graph'].count, 4) self.assertTrue(model_hs.wv.vocab['artificial'].count, 4) - self.assertTrue('tif' in model_hs.wv.ngrams) # ngram added because of the word `artificial` def test_online_learning_after_save(self): tmpf = get_tmpfile('gensim_fasttext.tst') @@ -464,11 +441,9 @@ def test_online_learning_after_save(self): model_neg.save(tmpf) model_neg = FT_gensim.load(tmpf) self.assertTrue(len(model_neg.wv.vocab), 12) - self.assertTrue(len(model_neg.wv.ngrams), 202) model_neg.build_vocab(new_sentences, update=True) # update vocab model_neg.train(new_sentences, total_examples=model_neg.corpus_count, epochs=model_neg.iter) self.assertEqual(len(model_neg.wv.vocab), 14) - self.assertTrue(len(model_neg.wv.ngrams), 271) def online_sanity(self, model): terro, others = [], [] @@ -483,11 +458,9 @@ def online_sanity(self, model): # checks that `syn0` is different from `syn0_vocab` self.assertFalse(np.all(np.equal(model.wv.syn0, model.wv.syn0_vocab))) self.assertFalse('terrorism' in model.wv.vocab) - self.assertFalse('orism>' in model.wv.ngrams) model.build_vocab(terro, update=True) # update vocab self.assertTrue(model.wv.syn0_ngrams.dtype == 'float32') self.assertTrue('terrorism' in model.wv.vocab) - self.assertTrue('orism>' in model.wv.ngrams) orig0_all = np.copy(model.wv.syn0_ngrams) model.train(terro, total_examples=len(terro), epochs=model.iter) self.assertFalse(np.allclose(model.wv.syn0_ngrams, orig0_all)) @@ -542,6 +515,17 @@ def test_bucket_ngrams(self): model.build_vocab(new_sentences, update=True) self.assertEqual(model.wv.syn0_ngrams.shape, (20, 10)) + def test_estimate_memory(self): + model = FT_gensim(sg=1, hs=1, negative=5, min_count=3) + model.build_vocab(sentences) + report = model.estimate_memory() + self.assertEqual(report['vocab'], 2800) + self.assertEqual(report['syn0_vocab'], 1600) + self.assertEqual(report['syn1'], 1600) + self.assertEqual(report['syn1neg'], 1600) + self.assertEqual(report['syn0_ngrams'], 22400) + self.assertEqual(report['total'], 30000) + def testLoadOldModel(self): """Test loading fasttext models from previous version""" diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py index bc995f8159..a81221cfeb 100644 --- a/gensim/test/test_fasttext_wrapper.py +++ b/gensim/test/test_fasttext_wrapper.py @@ -143,16 +143,16 @@ def testLoadFastTextFormat(self): # vector for oov words are slightly different from original FastText due to discarding unused ngrams # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin expected_vec_oov = [ - -0.23825, - -0.58482, - -0.22276, - -0.41215, - 0.91015, - -1.6786, - -0.26724, - 0.58818, - 0.57828, - 0.75801 + -0.21929, + -0.53778, + -0.22463, + -0.41735, + 0.71737, + -1.59758, + -0.24833, + 0.62028, + 0.53203, + 0.77568 ] self.assertTrue(numpy.allclose(model["rejection"], expected_vec_oov, atol=1e-4)) @@ -194,16 +194,16 @@ def testLoadFastTextNewFormat(self): # vector for oov words are slightly different from original FastText due to discarding unused ngrams # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin expected_vec_oov = [ - -0.53378, - -0.19, - 0.013482, - -0.86767, - -0.21684, - -0.89928, - 0.45124, - 0.18025, - -0.14128, - 0.22508 + -0.49111, + -0.13122, + -0.02109, + -0.88769, + -0.20105, + -0.91732, + 0.47243, + 0.19708, + -0.17856, + 0.19815 ] self.assertTrue(numpy.allclose(new_model["rejection"], expected_vec_oov, atol=1e-4)) @@ -296,8 +296,6 @@ def testLookup(self): # Out of vocab check self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue(numpy.allclose(self.test_model['nights'], self.test_model[['nights']])) - # Word with no ngrams in model - self.assertRaises(KeyError, lambda: self.test_model['a!@']) def testContains(self): """Tests __contains__ for in-vocab and out-of-vocab words""" @@ -307,20 +305,14 @@ def testContains(self): # Out of vocab check self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue('nights' in self.test_model) - # Word with no ngrams in model - self.assertFalse('a!@' in self.test_model.wv.vocab) - self.assertFalse('a!@' in self.test_model) def testWmdistance(self): """Tests wmdistance for docs with in-vocab and out-of-vocab words""" doc = ['night', 'payment'] oov_doc = ['nights', 'forests', 'payments'] - ngrams_absent_doc = ['a!@', 'b#$'] dist = self.test_model.wmdistance(doc, oov_doc) self.assertNotEqual(float('inf'), dist) - dist = self.test_model.wmdistance(doc, ngrams_absent_doc) - self.assertEqual(float('inf'), dist) def testDoesntMatch(self): """Tests doesnt_match for list of out-of-vocab words""" @@ -363,8 +355,8 @@ def testPersistenceForOldVersions(self): 1.19031894, 2.01627707, 1.98942184, -1.39095843, -0.65036952]) self.assertTrue(numpy.allclose(loaded_model["the"], in_expected_vec, atol=1e-4)) # out-of-vocab word - out_expected_vec = numpy.array([-1.34948218, -0.8686831, -1.51483142, -1.0164026, 0.56272298, - 0.66228276, 1.06477463, 1.1355902, -0.80972326, -0.39845538]) + out_expected_vec = numpy.array([-0.33959097, -0.21121596, -0.37212455, -0.25057459, 0.11222091, + 0.17517674, 0.26949012, 0.29352987, -0.1930912, -0.09438948]) self.assertTrue(numpy.allclose(loaded_model["random_word"], out_expected_vec, atol=1e-4)) diff --git a/setup.py b/setup.py index 697accd004..d5baad841c 100644 --- a/setup.py +++ b/setup.py @@ -258,6 +258,9 @@ def finalize_options(self): include_dirs=[model_dir]), Extension('gensim.models.fasttext_inner', sources=['./gensim/models/fasttext_inner.c'], + include_dirs=[model_dir]), + Extension('gensim.models.utils_any2vec_fast', + sources=['./gensim/models/utils_any2vec_fast.c'], include_dirs=[model_dir]) ], cmdclass=cmdclass, From f467ab90f6fcf8de12af42980daf3cdd317fdc6a Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Thu, 22 Feb 2018 01:47:26 +0100 Subject: [PATCH 04/20] Fix compute_ngrams for Python 2 --- gensim/models/fasttext_inner.c | 655 ++++++++++----------------- gensim/models/utils_any2vec_fast.c | 15 +- gensim/models/utils_any2vec_fast.pyx | 2 +- 3 files changed, 242 insertions(+), 430 deletions(-) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 2593039f3d..f5becae79d 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1291,13 +1291,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned P #define __PYX_FORCE_INIT_THREADS 0 #endif -/* Print.proto */ -static int __Pyx_Print(PyObject*, PyObject *, int); -#if CYTHON_COMPILING_IN_PYPY || PY_MAJOR_VERSION >= 3 -static PyObject* __pyx_print = 0; -static PyObject* __pyx_print_kwargs = 0; -#endif - /* RealImag.proto */ #if CYTHON_CCOMPLEX #ifdef __cplusplus @@ -1414,9 +1407,6 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( /* CIntFromPy.proto */ static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *); -/* PrintOne.proto */ -static int __Pyx_PrintOne(PyObject* stream, PyObject *o); - /* CheckBinaryVersion.proto */ static int __Pyx_check_binary_version(void); @@ -1531,10 +1521,8 @@ static const char __pyx_k_l1[] = "_l1"; static const char __pyx_k_np[] = "np"; static const char __pyx_k_wv[] = "wv"; static const char __pyx_k__14[] = "*"; -static const char __pyx_k_end[] = "end"; static const char __pyx_k_REAL[] = "REAL"; static const char __pyx_k_code[] = "code"; -static const char __pyx_k_file[] = "file"; static const char __pyx_k_init[] = "init"; static const char __pyx_k_item[] = "item"; static const char __pyx_k_l1_2[] = "l1"; @@ -1559,7 +1547,6 @@ static const char __pyx_k_model[] = "model"; static const char __pyx_k_numpy[] = "numpy"; static const char __pyx_k_p_res[] = "p_res"; static const char __pyx_k_point[] = "point"; -static const char __pyx_k_print[] = "print"; static const char __pyx_k_range[] = "range"; static const char __pyx_k_token[] = "token"; static const char __pyx_k_vocab[] = "vocab"; @@ -1617,7 +1604,6 @@ static const char __pyx_k_reduced_windows[] = "reduced_windows"; static const char __pyx_k_subwords_idx_len[] = "subwords_idx_len"; static const char __pyx_k_train_batch_cbow[] = "train_batch_cbow"; static const char __pyx_k_word_locks_vocab[] = "word_locks_vocab"; -static const char __pyx_k_CYTHON_BATCH_CBOW[] = "CYTHON BATCH_CBOW!"; static const char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static const char __pyx_k_word_locks_ngrams[] = "word_locks_ngrams"; static const char __pyx_k_MAX_WORDS_IN_BATCH[] = "MAX_WORDS_IN_BATCH"; @@ -1635,7 +1621,6 @@ static const char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte static const char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous"; static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import"; static const char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short."; -static PyObject *__pyx_kp_s_CYTHON_BATCH_CBOW; static PyObject *__pyx_n_s_FAST_VERSION; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2; @@ -1661,11 +1646,9 @@ static PyObject *__pyx_n_s_d_res; static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_effective_sentences; static PyObject *__pyx_n_s_effective_words; -static PyObject *__pyx_n_s_end; static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; static PyObject *__pyx_n_s_fblas; -static PyObject *__pyx_n_s_file; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_ft_hash; static PyObject *__pyx_n_s_gensim_models_fasttext_inner; @@ -1701,7 +1684,6 @@ static PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor; static PyObject *__pyx_n_s_p_res; static PyObject *__pyx_n_s_point; static PyObject *__pyx_n_s_points; -static PyObject *__pyx_n_s_print; static PyObject *__pyx_n_s_randint; static PyObject *__pyx_n_s_random; static PyObject *__pyx_n_s_range; @@ -5368,7 +5350,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * - * print "CYTHON BATCH_CBOW!" + * if hs: */ __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 420, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); @@ -5378,15 +5360,6 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT /* "gensim/models/fasttext_inner.pyx":422 * subword_arrays = {} * - * print "CYTHON BATCH_CBOW!" # <<<<<<<<<<<<<< - * if hs: - * syn1 = (np.PyArray_DATA(model.trainables.syn1)) - */ - if (__Pyx_PrintOne(0, __pyx_kp_s_CYTHON_BATCH_CBOW) < 0) __PYX_ERR(0, 422, __pyx_L1_error) - - /* "gensim/models/fasttext_inner.pyx":423 - * - * print "CYTHON BATCH_CBOW!" * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * @@ -5394,32 +5367,32 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":424 - * print "CYTHON BATCH_CBOW!" + /* "gensim/models/fasttext_inner.pyx":423 + * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 424, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 423, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 424, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 423, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 424, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 423, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":423 + /* "gensim/models/fasttext_inner.pyx":422 + * subword_arrays = {} * - * print "CYTHON BATCH_CBOW!" * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * */ } - /* "gensim/models/fasttext_inner.pyx":426 + /* "gensim/models/fasttext_inner.pyx":425 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5429,55 +5402,55 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":426 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 426, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 426, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 427, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 426, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":428 + /* "gensim/models/fasttext_inner.pyx":427 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 428, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 427, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":428 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":426 + /* "gensim/models/fasttext_inner.pyx":425 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5486,7 +5459,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":429 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5504,41 +5477,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":430 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":429 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5547,42 +5520,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":434 + /* "gensim/models/fasttext_inner.pyx":433 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 434, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 433, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":435 + /* "gensim/models/fasttext_inner.pyx":434 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 435, __pyx_L1_error) + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 434, __pyx_L1_error) __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":438 + /* "gensim/models/fasttext_inner.pyx":437 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":439 + /* "gensim/models/fasttext_inner.pyx":438 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -5591,7 +5564,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":440 + /* "gensim/models/fasttext_inner.pyx":439 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -5602,26 +5575,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 439, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5631,7 +5604,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 440, __pyx_L1_error) + else __PYX_ERR(0, 439, __pyx_L1_error) } break; } @@ -5640,27 +5613,27 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":440 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":441 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< * for token in sent: - * print(token) + * word = vlookup[token] if token in vlookup else None */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":440 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -5669,37 +5642,37 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":443 + /* "gensim/models/fasttext_inner.pyx":442 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< - * print(token) * word = vlookup[token] if token in vlookup else None + * if word is None: */ if (likely(PyList_CheckExact(__pyx_v_sent)) || PyTuple_CheckExact(__pyx_v_sent)) { __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 442, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 442, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 442, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 442, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -5709,7 +5682,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 443, __pyx_L1_error) + else __PYX_ERR(0, 442, __pyx_L1_error) } break; } @@ -5718,25 +5691,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":443 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: - * print(token) # <<<<<<<<<<<<<< - * word = vlookup[token] if token in vlookup else None - * if word is None: - */ - if (__Pyx_PrintOne(0, __pyx_v_token) < 0) __PYX_ERR(0, 444, __pyx_L1_error) - - /* "gensim/models/fasttext_inner.pyx":445 - * for token in sent: - * print(token) * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -5747,8 +5711,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":446 - * print(token) + /* "gensim/models/fasttext_inner.pyx":444 + * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window @@ -5758,7 +5722,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":447 + /* "gensim/models/fasttext_inner.pyx":445 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -5767,8 +5731,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":446 - * print(token) + /* "gensim/models/fasttext_inner.pyx":444 + * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window @@ -5776,7 +5740,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":446 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5789,20 +5753,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":449 + /* "gensim/models/fasttext_inner.pyx":447 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -5811,7 +5775,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":446 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5820,46 +5784,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":450 + /* "gensim/models/fasttext_inner.pyx":448 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":452 + /* "gensim/models/fasttext_inner.pyx":450 * indexes[effective_words] = word.index * * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - /* "gensim/models/fasttext_inner.pyx":453 + /* "gensim/models/fasttext_inner.pyx":451 * * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_16 = NULL; @@ -5877,7 +5841,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -5887,7 +5851,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -5895,7 +5859,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } else #endif { - __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); if (__pyx_t_16) { __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; @@ -5909,7 +5873,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); __pyx_t_17 = 0; __pyx_t_18 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; } @@ -5918,9 +5882,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; __pyx_t_21 = NULL; } else { - __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 451, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -5928,17 +5892,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_1))) { if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 451, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 451, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -5948,7 +5912,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 453, __pyx_L1_error) + else __PYX_ERR(0, 451, __pyx_L1_error) } break; } @@ -5957,19 +5921,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); __pyx_t_13 = 0; - /* "gensim/models/fasttext_inner.pyx":452 + /* "gensim/models/fasttext_inner.pyx":450 * indexes[effective_words] = word.index * * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __pyx_t_17 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { @@ -5982,13 +5946,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } if (!__pyx_t_17) { - __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); } else { #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_18)) { PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_13); } else @@ -5996,38 +5960,38 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_13); } else #endif { - __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; __Pyx_INCREF(__pyx_v_subword); __Pyx_GIVEREF(__pyx_v_subword); PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; } } __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 452, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - /* "gensim/models/fasttext_inner.pyx":453 + /* "gensim/models/fasttext_inner.pyx":451 * * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< @@ -6039,33 +6003,33 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":454 + /* "gensim/models/fasttext_inner.pyx":452 * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(__pyx_v_subwords); __Pyx_GIVEREF(__pyx_v_subwords); PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_v_subwords); - __pyx_t_18 = PyDict_New(); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_18 = PyDict_New(); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_18, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 454, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_18, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, __pyx_t_18); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, __pyx_t_18); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; @@ -6073,39 +6037,39 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":455 + /* "gensim/models/fasttext_inner.pyx":453 * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 453, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":456 + /* "gensim/models/fasttext_inner.pyx":454 * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 456, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 454, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":458 + /* "gensim/models/fasttext_inner.pyx":456 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 458, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 458, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":460 + /* "gensim/models/fasttext_inner.pyx":458 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6115,46 +6079,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":459 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 461, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 459, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 461, __pyx_L1_error) + __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 459, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":460 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 462, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 462, __pyx_L1_error) + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 460, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":463 + /* "gensim/models/fasttext_inner.pyx":461 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 461, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 463, __pyx_L1_error) + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 461, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":460 + /* "gensim/models/fasttext_inner.pyx":458 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6163,7 +6127,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":464 + /* "gensim/models/fasttext_inner.pyx":462 * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 # <<<<<<<<<<<<<< @@ -6172,7 +6136,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":465 + /* "gensim/models/fasttext_inner.pyx":463 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6182,7 +6146,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":466 + /* "gensim/models/fasttext_inner.pyx":464 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -6191,7 +6155,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":465 + /* "gensim/models/fasttext_inner.pyx":463 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6200,19 +6164,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":443 + /* "gensim/models/fasttext_inner.pyx":442 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< - * print(token) * word = vlookup[token] if token in vlookup else None + * if word is None: */ __pyx_L11_continue:; } __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":471 + /* "gensim/models/fasttext_inner.pyx":469 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 478, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_19)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 476, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -6353,17 +6317,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_19))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 478, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 476, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 478, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 476, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 478, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 476, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 478, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 476, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -6373,7 +6337,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 478, __pyx_L1_error) + else __PYX_ERR(0, 476, __pyx_L1_error) } break; } @@ -6384,17 +6348,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":479 + /* "gensim/models/fasttext_inner.pyx":477 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 477, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":478 + /* "gensim/models/fasttext_inner.pyx":476 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -6404,7 +6368,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":482 + /* "gensim/models/fasttext_inner.pyx":480 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6418,7 +6382,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":483 + /* "gensim/models/fasttext_inner.pyx":481 * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -6429,7 +6393,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { __pyx_v_sent_idx = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":482 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -6438,7 +6402,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":485 + /* "gensim/models/fasttext_inner.pyx":483 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -6447,7 +6411,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":484 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -6458,7 +6422,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { __pyx_v_i = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":485 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6467,7 +6431,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":486 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6477,7 +6441,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":487 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -6486,7 +6450,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":486 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6495,7 +6459,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":488 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6504,7 +6468,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":491 + /* "gensim/models/fasttext_inner.pyx":489 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6514,7 +6478,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":492 + /* "gensim/models/fasttext_inner.pyx":490 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -6523,7 +6487,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":491 + /* "gensim/models/fasttext_inner.pyx":489 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6532,7 +6496,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":494 + /* "gensim/models/fasttext_inner.pyx":492 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6542,7 +6506,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":495 + /* "gensim/models/fasttext_inner.pyx":493 * * if hs: * fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -6551,7 +6515,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":494 + /* "gensim/models/fasttext_inner.pyx":492 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6560,7 +6524,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":499 + /* "gensim/models/fasttext_inner.pyx":497 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6570,7 +6534,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":500 + /* "gensim/models/fasttext_inner.pyx":498 * word_locks_ngrams) * if negative: * next_random = fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -6579,7 +6543,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":499 + /* "gensim/models/fasttext_inner.pyx":497 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6591,7 +6555,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":482 + /* "gensim/models/fasttext_inner.pyx":480 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6609,7 +6573,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":505 + /* "gensim/models/fasttext_inner.pyx":503 * cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -6617,7 +6581,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 505, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __pyx_r = __pyx_t_19; __pyx_t_19 = 0; @@ -6659,7 +6623,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":508 +/* "gensim/models/fasttext_inner.pyx":506 * * * def init(): # <<<<<<<<<<<<<< @@ -6698,7 +6662,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":517 + /* "gensim/models/fasttext_inner.pyx":515 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6708,7 +6672,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":518 + /* "gensim/models/fasttext_inner.pyx":516 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6718,7 +6682,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":519 + /* "gensim/models/fasttext_inner.pyx":517 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6727,7 +6691,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":520 + /* "gensim/models/fasttext_inner.pyx":518 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6736,7 +6700,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":525 + /* "gensim/models/fasttext_inner.pyx":523 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6746,7 +6710,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":526 + /* "gensim/models/fasttext_inner.pyx":524 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6755,7 +6719,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":527 + /* "gensim/models/fasttext_inner.pyx":525 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6764,7 +6728,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":528 + /* "gensim/models/fasttext_inner.pyx":526 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6774,7 +6738,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":531 + /* "gensim/models/fasttext_inner.pyx":529 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6783,7 +6747,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":532 + /* "gensim/models/fasttext_inner.pyx":530 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6792,7 +6756,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":531 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6802,7 +6766,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":532 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6811,7 +6775,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":533 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6820,7 +6784,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":536 + /* "gensim/models/fasttext_inner.pyx":534 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6832,7 +6796,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":531 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6841,7 +6805,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":537 + /* "gensim/models/fasttext_inner.pyx":535 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6851,7 +6815,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":538 + /* "gensim/models/fasttext_inner.pyx":536 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6860,7 +6824,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":539 + /* "gensim/models/fasttext_inner.pyx":537 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6869,7 +6833,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":540 + /* "gensim/models/fasttext_inner.pyx":538 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6881,7 +6845,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":537 + /* "gensim/models/fasttext_inner.pyx":535 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6890,7 +6854,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":544 + /* "gensim/models/fasttext_inner.pyx":542 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6900,7 +6864,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":545 + /* "gensim/models/fasttext_inner.pyx":543 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6909,7 +6873,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":546 + /* "gensim/models/fasttext_inner.pyx":544 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6922,7 +6886,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":508 + /* "gensim/models/fasttext_inner.pyx":506 * * * def init(): # <<<<<<<<<<<<<< @@ -9474,7 +9438,6 @@ static struct PyModuleDef __pyx_moduledef = { #endif static __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_kp_s_CYTHON_BATCH_CBOW, __pyx_k_CYTHON_BATCH_CBOW, sizeof(__pyx_k_CYTHON_BATCH_CBOW), 0, 0, 1, 0}, {&__pyx_n_s_FAST_VERSION, __pyx_k_FAST_VERSION, sizeof(__pyx_k_FAST_VERSION), 0, 0, 1, 1}, {&__pyx_kp_u_Format_string_allocated_too_shor, __pyx_k_Format_string_allocated_too_shor, sizeof(__pyx_k_Format_string_allocated_too_shor), 0, 1, 0, 0}, {&__pyx_kp_u_Format_string_allocated_too_shor_2, __pyx_k_Format_string_allocated_too_shor_2, sizeof(__pyx_k_Format_string_allocated_too_shor_2), 0, 1, 0, 0}, @@ -9500,11 +9463,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_effective_sentences, __pyx_k_effective_sentences, sizeof(__pyx_k_effective_sentences), 0, 0, 1, 1}, {&__pyx_n_s_effective_words, __pyx_k_effective_words, sizeof(__pyx_k_effective_words), 0, 0, 1, 1}, - {&__pyx_n_s_end, __pyx_k_end, sizeof(__pyx_k_end), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, - {&__pyx_n_s_file, __pyx_k_file, sizeof(__pyx_k_file), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_fasttext_inner, __pyx_k_gensim_models_fasttext_inner, sizeof(__pyx_k_gensim_models_fasttext_inner), 0, 0, 1, 1}, @@ -9540,7 +9501,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_p_res, __pyx_k_p_res, sizeof(__pyx_k_p_res), 0, 0, 1, 1}, {&__pyx_n_s_point, __pyx_k_point, sizeof(__pyx_k_point), 0, 0, 1, 1}, {&__pyx_n_s_points, __pyx_k_points, sizeof(__pyx_k_points), 0, 0, 1, 1}, - {&__pyx_n_s_print, __pyx_k_print, sizeof(__pyx_k_print), 0, 0, 1, 1}, {&__pyx_n_s_randint, __pyx_k_randint, sizeof(__pyx_k_randint), 0, 0, 1, 1}, {&__pyx_n_s_random, __pyx_k_random, sizeof(__pyx_k_random), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, @@ -9619,17 +9579,17 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":430 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -9754,17 +9714,17 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_tuple__17); __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_train_batch_cbow, 378, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 378, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":508 + /* "gensim/models/fasttext_inner.pyx":506 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 508, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 506, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_init, 508, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 508, __pyx_L1_error) + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_init, 506, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 506, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -10130,26 +10090,26 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 378, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":508 + /* "gensim/models/fasttext_inner.pyx":506 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 508, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 506, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 508, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 506, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":548 + /* "gensim/models/fasttext_inner.pyx":546 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN * */ - __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 548, __pyx_L1_error) + __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 546, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_7 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) { @@ -10162,23 +10122,23 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) } } if (__pyx_t_7) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 548, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 546, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 548, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 546, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 548, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 546, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":549 + /* "gensim/models/fasttext_inner.pyx":547 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< * */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 549, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 547, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< @@ -11369,112 +11329,6 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, } } -/* Print */ - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3 -static PyObject *__Pyx_GetStdout(void) { - PyObject *f = PySys_GetObject((char *)"stdout"); - if (!f) { - PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); - } - return f; -} -static int __Pyx_Print(PyObject* f, PyObject *arg_tuple, int newline) { - int i; - if (!f) { - if (!(f = __Pyx_GetStdout())) - return -1; - } - Py_INCREF(f); - for (i=0; i < PyTuple_GET_SIZE(arg_tuple); i++) { - PyObject* v; - if (PyFile_SoftSpace(f, 1)) { - if (PyFile_WriteString(" ", f) < 0) - goto error; - } - v = PyTuple_GET_ITEM(arg_tuple, i); - if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0) - goto error; - if (PyString_Check(v)) { - char *s = PyString_AsString(v); - Py_ssize_t len = PyString_Size(v); - if (len > 0) { - switch (s[len-1]) { - case ' ': break; - case '\f': case '\r': case '\n': case '\t': case '\v': - PyFile_SoftSpace(f, 0); - break; - default: break; - } - } - } - } - if (newline) { - if (PyFile_WriteString("\n", f) < 0) - goto error; - PyFile_SoftSpace(f, 0); - } - Py_DECREF(f); - return 0; -error: - Py_DECREF(f); - return -1; -} -#else -static int __Pyx_Print(PyObject* stream, PyObject *arg_tuple, int newline) { - PyObject* kwargs = 0; - PyObject* result = 0; - PyObject* end_string; - if (unlikely(!__pyx_print)) { - __pyx_print = PyObject_GetAttr(__pyx_b, __pyx_n_s_print); - if (!__pyx_print) - return -1; - } - if (stream) { - kwargs = PyDict_New(); - if (unlikely(!kwargs)) - return -1; - if (unlikely(PyDict_SetItem(kwargs, __pyx_n_s_file, stream) < 0)) - goto bad; - if (!newline) { - end_string = PyUnicode_FromStringAndSize(" ", 1); - if (unlikely(!end_string)) - goto bad; - if (PyDict_SetItem(kwargs, __pyx_n_s_end, end_string) < 0) { - Py_DECREF(end_string); - goto bad; - } - Py_DECREF(end_string); - } - } else if (!newline) { - if (unlikely(!__pyx_print_kwargs)) { - __pyx_print_kwargs = PyDict_New(); - if (unlikely(!__pyx_print_kwargs)) - return -1; - end_string = PyUnicode_FromStringAndSize(" ", 1); - if (unlikely(!end_string)) - return -1; - if (PyDict_SetItem(__pyx_print_kwargs, __pyx_n_s_end, end_string) < 0) { - Py_DECREF(end_string); - return -1; - } - Py_DECREF(end_string); - } - kwargs = __pyx_print_kwargs; - } - result = PyObject_Call(__pyx_print, arg_tuple, kwargs); - if (unlikely(kwargs) && (kwargs != __pyx_print_kwargs)) - Py_DECREF(kwargs); - if (!result) - return -1; - Py_DECREF(result); - return 0; -bad: - if (kwargs != __pyx_print_kwargs) - Py_XDECREF(kwargs); - return -1; -} -#endif - /* Declarations */ #if CYTHON_CCOMPLEX #ifdef __cplusplus @@ -12761,43 +12615,6 @@ static int __Pyx_Print(PyObject* stream, PyObject *arg_tuple, int newline) { return (npy_uint32) -1; } -/* PrintOne */ - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3 -static int __Pyx_PrintOne(PyObject* f, PyObject *o) { - if (!f) { - if (!(f = __Pyx_GetStdout())) - return -1; - } - Py_INCREF(f); - if (PyFile_SoftSpace(f, 0)) { - if (PyFile_WriteString(" ", f) < 0) - goto error; - } - if (PyFile_WriteObject(o, f, Py_PRINT_RAW) < 0) - goto error; - if (PyFile_WriteString("\n", f) < 0) - goto error; - Py_DECREF(f); - return 0; -error: - Py_DECREF(f); - return -1; - /* the line below is just to avoid C compiler - * warnings about unused functions */ - return __Pyx_Print(f, NULL, 0); -} -#else -static int __Pyx_PrintOne(PyObject* stream, PyObject *o) { - int res; - PyObject* arg_tuple = PyTuple_Pack(1, o); - if (unlikely(!arg_tuple)) - return -1; - res = __Pyx_Print(stream, arg_tuple, 1); - Py_DECREF(arg_tuple); - return res; -} -#endif - /* CheckBinaryVersion */ static int __Pyx_check_binary_version(void) { char ctversion[4], rtversion[4]; diff --git a/gensim/models/utils_any2vec_fast.c b/gensim/models/utils_any2vec_fast.c index 8bcca72c8c..e46231bb45 100644 --- a/gensim/models/utils_any2vec_fast.c +++ b/gensim/models/utils_any2vec_fast.c @@ -1013,7 +1013,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN /* "gensim/models/utils_any2vec_fast.pyx":15 * * - * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< * cdef unicode extended_word = f'<{word}>' * ngrams = [] */ @@ -1042,7 +1042,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO /* "gensim/models/utils_any2vec_fast.pyx":16 * - * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): * cdef unicode extended_word = f'<{word}>' # <<<<<<<<<<<<<< * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): @@ -1073,7 +1073,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __pyx_t_4 = 0; /* "gensim/models/utils_any2vec_fast.pyx":17 - * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): + * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): * cdef unicode extended_word = f'<{word}>' * ngrams = [] # <<<<<<<<<<<<<< * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): @@ -1275,7 +1275,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO /* "gensim/models/utils_any2vec_fast.pyx":15 * * - * cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< + * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< * cdef unicode extended_word = f'<{word}>' * ngrams = [] */ @@ -1345,7 +1345,7 @@ static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(P values[1] = PyTuple_GET_ITEM(__pyx_args, 1); values[2] = PyTuple_GET_ITEM(__pyx_args, 2); } - __pyx_v_word = ((PyObject*)values[0]); + __pyx_v_word = values[0]; __pyx_v_min_n = __Pyx_PyInt_As_unsigned_int(values[1]); if (unlikely((__pyx_v_min_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) __pyx_v_max_n = __Pyx_PyInt_As_unsigned_int(values[2]); if (unlikely((__pyx_v_max_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) } @@ -1357,14 +1357,9 @@ static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(P __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_word), (&PyUnicode_Type), 1, "word", 1))) __PYX_ERR(0, 15, __pyx_L1_error) __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(__pyx_self, __pyx_v_word, __pyx_v_min_n, __pyx_v_max_n); /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } diff --git a/gensim/models/utils_any2vec_fast.pyx b/gensim/models/utils_any2vec_fast.pyx index 92d16585b5..05745c0526 100644 --- a/gensim/models/utils_any2vec_fast.pyx +++ b/gensim/models/utils_any2vec_fast.pyx @@ -12,7 +12,7 @@ def ft_hash(unicode string): return h -cpdef compute_ngrams(unicode word, unsigned int min_n, unsigned int max_n): +cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): cdef unicode extended_word = f'<{word}>' ngrams = [] for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): From 5c576ad3b0d1ddbd6477b3390cfc81519de71ed8 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Thu, 22 Feb 2018 13:05:47 +0100 Subject: [PATCH 05/20] Store OOV vec in variable for more informative assertion error in testPersistenceForOldVersions --- gensim/test/test_fasttext_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py index a81221cfeb..139063a268 100644 --- a/gensim/test/test_fasttext_wrapper.py +++ b/gensim/test/test_fasttext_wrapper.py @@ -357,7 +357,8 @@ def testPersistenceForOldVersions(self): # out-of-vocab word out_expected_vec = numpy.array([-0.33959097, -0.21121596, -0.37212455, -0.25057459, 0.11222091, 0.17517674, 0.26949012, 0.29352987, -0.1930912, -0.09438948]) - self.assertTrue(numpy.allclose(loaded_model["random_word"], out_expected_vec, atol=1e-4)) + out_vec = loaded_model["random_word"] + self.assertTrue(numpy.allclose(out_vec, out_expected_vec, atol=1e-4)) if __name__ == '__main__': From 0a5912c700a220d50ef3c4aaee111065ebfdee39 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Thu, 22 Feb 2018 14:23:20 +0100 Subject: [PATCH 06/20] Revert all changes to fasttext_wrapper --- gensim/models/deprecated/fasttext_wrapper.py | 34 +++++------- gensim/test/test_fasttext_wrapper.py | 55 +++++++++++--------- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/gensim/models/deprecated/fasttext_wrapper.py b/gensim/models/deprecated/fasttext_wrapper.py index 9e7416c2a8..e48a96072f 100644 --- a/gensim/models/deprecated/fasttext_wrapper.py +++ b/gensim/models/deprecated/fasttext_wrapper.py @@ -70,7 +70,9 @@ def __init__(self): self.syn0_vocab_norm = None self.syn0_ngrams = None self.syn0_ngrams_norm = None + self.ngrams = {} self.hash2index = {} + self.ngrams_word = {} self.min_n = 0 self.max_n = 0 @@ -97,18 +99,17 @@ def word_vec(self, word, use_norm=False): return super(FastTextKeyedVectors, self).word_vec(word, use_norm) else: word_vec = np.zeros(self.syn0_ngrams.shape[1], dtype=np.float32) - hashes = [ft_hash(ng) % self.bucket - for ng in compute_ngrams(word, self.min_n, self.max_n)] - hashes = [h for h in hashes if h in self.hash2index] + ngrams = compute_ngrams(word, self.min_n, self.max_n) + ngrams = [ng for ng in ngrams if ng in self.ngrams] if use_norm: ngram_weights = self.syn0_ngrams_norm else: ngram_weights = self.syn0_ngrams - for ngram_hash in hashes: - word_vec += ngram_weights[self.hash2index[ngram_hash]] + for ngram in ngrams: + word_vec += ngram_weights[self.ngrams[ngram]] if word_vec.any(): - return word_vec / len(hashes) - else: # No hashes of any ngrams of the word are present in self.hash2index + return word_vec / len(ngrams) + else: # No ngrams of the word are present in self.ngrams raise KeyError('all ngrams for word %s absent from model' % word) def init_sims(self, replace=False): @@ -142,8 +143,7 @@ def __contains__(self, word): return True else: char_ngrams = compute_ngrams(word, self.min_n, self.max_n) - return any(ft_hash(ng) % self.bucket in self.hash2index - for ng in char_ngrams) + return any(ng in self.ngrams for ng in char_ngrams) @classmethod def load_word2vec_format(cls, *args, **kwargs): @@ -277,12 +277,6 @@ def load(cls, *args, **kwargs): if hasattr(model.wv, 'syn0_all'): setattr(model.wv, 'syn0_ngrams', model.wv.syn0_all) delattr(model.wv, 'syn0_all') - setattr(model.wv, 'bucket', model.wv.syn0_ngrams.shape[0]) - if not hasattr(model.wv, 'hash2index') and hasattr(model.wv, 'ngrams'): - model.wv.hash2index = {} - for i, ngram in enumerate(model.wv.ngrams): - ngram_hash = ft_hash(ngram) % model.wv.bucket - model.wv.hash2index[ngram_hash] = i return model @classmethod @@ -322,7 +316,6 @@ def load_model_params(self, file_handle): self.hs = loss == 1 self.sg = model == 2 self.bucket = bucket - self.wv.bucket = bucket self.wv.min_n = minn self.wv.max_n = maxn self.sample = t @@ -401,6 +394,7 @@ def init_ngrams(self): vectors are discarded here to save space. """ + self.wv.ngrams = {} all_ngrams = [] self.wv.syn0 = np.zeros((len(self.wv.vocab), self.vector_size), dtype=REAL) @@ -412,9 +406,9 @@ def init_ngrams(self): self.num_ngram_vectors = len(all_ngrams) ngram_indices = [] for i, ngram in enumerate(all_ngrams): - ngram_hash = ft_hash(ngram) % self.bucket + ngram_hash = ft_hash(ngram) ngram_indices.append(len(self.wv.vocab) + ngram_hash % self.bucket) - self.wv.hash2index[ngram_hash] = i + self.wv.ngrams[ngram] = i self.wv.syn0_ngrams = self.wv.syn0_ngrams.take(ngram_indices, axis=0) ngram_weights = self.wv.syn0_ngrams @@ -427,9 +421,7 @@ def init_ngrams(self): for w, vocab in self.wv.vocab.items(): word_ngrams = compute_ngrams(w, self.wv.min_n, self.wv.max_n) for word_ngram in word_ngrams: - ng_hash = ft_hash(word_ngram) % self.bucket - self.wv.syn0[vocab.index] += np.array( - ngram_weights[self.wv.hash2index[ng_hash]]) + self.wv.syn0[vocab.index] += np.array(ngram_weights[self.wv.ngrams[word_ngram]]) self.wv.syn0[vocab.index] /= (len(word_ngrams) + 1) logger.info( diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py index 139063a268..bc995f8159 100644 --- a/gensim/test/test_fasttext_wrapper.py +++ b/gensim/test/test_fasttext_wrapper.py @@ -143,16 +143,16 @@ def testLoadFastTextFormat(self): # vector for oov words are slightly different from original FastText due to discarding unused ngrams # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin expected_vec_oov = [ - -0.21929, - -0.53778, - -0.22463, - -0.41735, - 0.71737, - -1.59758, - -0.24833, - 0.62028, - 0.53203, - 0.77568 + -0.23825, + -0.58482, + -0.22276, + -0.41215, + 0.91015, + -1.6786, + -0.26724, + 0.58818, + 0.57828, + 0.75801 ] self.assertTrue(numpy.allclose(model["rejection"], expected_vec_oov, atol=1e-4)) @@ -194,16 +194,16 @@ def testLoadFastTextNewFormat(self): # vector for oov words are slightly different from original FastText due to discarding unused ngrams # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin expected_vec_oov = [ - -0.49111, - -0.13122, - -0.02109, - -0.88769, - -0.20105, - -0.91732, - 0.47243, - 0.19708, - -0.17856, - 0.19815 + -0.53378, + -0.19, + 0.013482, + -0.86767, + -0.21684, + -0.89928, + 0.45124, + 0.18025, + -0.14128, + 0.22508 ] self.assertTrue(numpy.allclose(new_model["rejection"], expected_vec_oov, atol=1e-4)) @@ -296,6 +296,8 @@ def testLookup(self): # Out of vocab check self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue(numpy.allclose(self.test_model['nights'], self.test_model[['nights']])) + # Word with no ngrams in model + self.assertRaises(KeyError, lambda: self.test_model['a!@']) def testContains(self): """Tests __contains__ for in-vocab and out-of-vocab words""" @@ -305,14 +307,20 @@ def testContains(self): # Out of vocab check self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue('nights' in self.test_model) + # Word with no ngrams in model + self.assertFalse('a!@' in self.test_model.wv.vocab) + self.assertFalse('a!@' in self.test_model) def testWmdistance(self): """Tests wmdistance for docs with in-vocab and out-of-vocab words""" doc = ['night', 'payment'] oov_doc = ['nights', 'forests', 'payments'] + ngrams_absent_doc = ['a!@', 'b#$'] dist = self.test_model.wmdistance(doc, oov_doc) self.assertNotEqual(float('inf'), dist) + dist = self.test_model.wmdistance(doc, ngrams_absent_doc) + self.assertEqual(float('inf'), dist) def testDoesntMatch(self): """Tests doesnt_match for list of out-of-vocab words""" @@ -355,10 +363,9 @@ def testPersistenceForOldVersions(self): 1.19031894, 2.01627707, 1.98942184, -1.39095843, -0.65036952]) self.assertTrue(numpy.allclose(loaded_model["the"], in_expected_vec, atol=1e-4)) # out-of-vocab word - out_expected_vec = numpy.array([-0.33959097, -0.21121596, -0.37212455, -0.25057459, 0.11222091, - 0.17517674, 0.26949012, 0.29352987, -0.1930912, -0.09438948]) - out_vec = loaded_model["random_word"] - self.assertTrue(numpy.allclose(out_vec, out_expected_vec, atol=1e-4)) + out_expected_vec = numpy.array([-1.34948218, -0.8686831, -1.51483142, -1.0164026, 0.56272298, + 0.66228276, 1.06477463, 1.1355902, -0.80972326, -0.39845538]) + self.assertTrue(numpy.allclose(loaded_model["random_word"], out_expected_vec, atol=1e-4)) if __name__ == '__main__': From 9a36b08f08b3737b1f010c66fa9a04314fc05bf1 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Fri, 23 Feb 2018 15:06:29 +0100 Subject: [PATCH 07/20] Fix indentation for multi-line expressions --- gensim/models/fasttext.py | 16 +- gensim/models/fasttext_inner.c | 884 +++++++++++++-------------- gensim/models/fasttext_inner.pyx | 12 +- gensim/models/keyedvectors.py | 3 +- gensim/models/utils_any2vec_fast.c | 158 +++-- gensim/models/utils_any2vec_fast.pyx | 2 +- 6 files changed, 527 insertions(+), 548 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 6a0aab2988..908b0bfe03 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -92,8 +92,7 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): for index in word2_indices: vocab_subwords_indices += [index] - word2_subwords += _compute_ngrams(model.wv.index2word[index], - model.min_n, model.max_n) + word2_subwords += _compute_ngrams(model.wv.index2word[index], model.min_n, model.max_n) for subword in word2_subwords: ngrams_subwords_indices.append( @@ -145,8 +144,7 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): start = max(0, pos - model.window + reduced_window) subwords_indices = [word.index] - word2_subwords = _compute_ngrams(model.wv.index2word[word.index], - model.min_n, model.max_n) + word2_subwords = _compute_ngrams(model.wv.index2word[word.index], model.min_n, model.max_n) for subword in word2_subwords: subwords_indices.append(model.wv.hash2index[_ft_hash(subword) % model.bucket]) @@ -419,9 +417,10 @@ def estimate_memory(self, vocab_size=None, report=None): num_buckets = len(buckets) report['syn0_ngrams'] = len(buckets) * vec_size elif self.word_ngrams > 0: - logger.warn('subword information is enabled, but no vocabulary ' - 'could be found, estimated required memory might be ' - 'inaccurate!') + logger.warn( + 'subword information is enabled, but no vocabulary could be found, estimated required memory might be ' + 'inaccurate!' + ) report['total'] = sum(report.values()) logger.info( "estimated required memory for %i words, %i buckets and %i dimensions: %i bytes", @@ -849,8 +848,7 @@ def get_vocab_word_vecs(self, wv): ngrams = _compute_ngrams(w, wv.min_n, wv.max_n) ngram_weights = wv.vectors_ngrams for ngram in ngrams: - word_vec += ngram_weights[ - wv.hash2index[_ft_hash(ngram) % self.bucket]] + word_vec += ngram_weights[wv.hash2index[_ft_hash(ngram) % self.bucket]] word_vec /= (len(ngrams) + 1) wv.vectors[v.index] = word_vec diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index f5becae79d..7fe2508926 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1613,7 +1613,7 @@ static const char __pyx_k_vectors_vocab_lockf[] = "vectors_vocab_lockf"; static const char __pyx_k_vectors_ngrams_lockf[] = "vectors_ngrams_lockf"; static const char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; static const char __pyx_k_gensim_models_fasttext_inner[] = "gensim.models.fasttext_inner"; -static const char __pyx_k_home_jojo_projekte_gensim_gensi[] = "/home/jojo/projekte/gensim/gensim/models/fasttext_inner.pyx"; +static const char __pyx_k_home_jbaiter_projekte_gensim_ge[] = "/home/jbaiter/.projekte/gensim/gensim/models/fasttext_inner.pyx"; static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import"; static const char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static const char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; @@ -1653,7 +1653,7 @@ static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_ft_hash; static PyObject *__pyx_n_s_gensim_models_fasttext_inner; static PyObject *__pyx_n_s_hash2index; -static PyObject *__pyx_kp_s_home_jojo_projekte_gensim_gensi; +static PyObject *__pyx_kp_s_home_jbaiter_projekte_gensim_ge; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_idx_end; @@ -4124,7 +4124,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * subwords = [ */ __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); @@ -4135,30 +4135,30 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON /* "gensim/models/fasttext_inner.pyx":320 * indexes[effective_words] = word.index * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords = [ # <<<<<<<<<<<<<< + * model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) */ __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - /* "gensim/models/fasttext_inner.pyx":321 - * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":322 + * subwords = [ + * model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< + * ] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = (len(subwords) + 1) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_16 = NULL; @@ -4176,7 +4176,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -4186,7 +4186,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -4194,7 +4194,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } else #endif { - __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); if (__pyx_t_16) { __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; @@ -4208,7 +4208,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); __pyx_t_17 = 0; __pyx_t_18 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; } @@ -4217,9 +4217,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; __pyx_t_21 = NULL; } else { - __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 322, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -4227,17 +4227,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_1))) { if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 322, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 322, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -4247,7 +4247,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 321, __pyx_L1_error) + else __PYX_ERR(0, 322, __pyx_L1_error) } break; } @@ -4256,19 +4256,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); __pyx_t_13 = 0; - /* "gensim/models/fasttext_inner.pyx":320 - * indexes[effective_words] = word.index + /* "gensim/models/fasttext_inner.pyx":321 * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords = [ + * model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + * ] */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __pyx_t_17 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { @@ -4281,13 +4281,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } if (!__pyx_t_17) { - __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); } else { #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_18)) { PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_13); } else @@ -4295,86 +4295,86 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_13); } else #endif { - __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; __Pyx_INCREF(__pyx_v_subword); __Pyx_GIVEREF(__pyx_v_subword); PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; } } __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - /* "gensim/models/fasttext_inner.pyx":321 - * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":322 + * subwords = [ + * model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< + * ] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = (len(subwords) + 1) */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":322 - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + /* "gensim/models/fasttext_inner.pyx":324 + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + * ] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_18 = PyList_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_18 = PyList_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_GIVEREF(__pyx_t_14); PyList_SET_ITEM(__pyx_t_18, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_18, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_14 = PyNumber_Add(__pyx_t_18, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyTuple_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_18 = PyTuple_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_18, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyDict_New(); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_14 = PyDict_New(); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 322, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_18, __pyx_t_14); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_18, __pyx_t_14); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 324, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; @@ -4382,39 +4382,39 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":323 - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + /* "gensim/models/fasttext_inner.pyx":325 + * ] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 325, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_20 + 1)); - /* "gensim/models/fasttext_inner.pyx":324 + /* "gensim/models/fasttext_inner.pyx":326 * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 324, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 326, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":326 + /* "gensim/models/fasttext_inner.pyx":328 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 326, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 328, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 326, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 328, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":328 + /* "gensim/models/fasttext_inner.pyx":330 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4424,46 +4424,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":329 + /* "gensim/models/fasttext_inner.pyx":331 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 331, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 331, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":330 + /* "gensim/models/fasttext_inner.pyx":332 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 332, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 330, __pyx_L1_error) + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 332, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":331 + /* "gensim/models/fasttext_inner.pyx":333 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * * effective_words += 1 */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 331, __pyx_L1_error) + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 333, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":328 + /* "gensim/models/fasttext_inner.pyx":330 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4472,7 +4472,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":333 + /* "gensim/models/fasttext_inner.pyx":335 * points[effective_words] = np.PyArray_DATA(word.point) * * effective_words += 1 # <<<<<<<<<<<<<< @@ -4481,7 +4481,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":334 + /* "gensim/models/fasttext_inner.pyx":336 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4491,7 +4491,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":335 + /* "gensim/models/fasttext_inner.pyx":337 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -4500,7 +4500,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":334 + /* "gensim/models/fasttext_inner.pyx":336 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4521,7 +4521,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":340 + /* "gensim/models/fasttext_inner.pyx":342 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_19)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 349, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -4662,17 +4662,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_19))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 349, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 349, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -4682,7 +4682,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 347, __pyx_L1_error) + else __PYX_ERR(0, 349, __pyx_L1_error) } break; } @@ -4693,17 +4693,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":348 + /* "gensim/models/fasttext_inner.pyx":350 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * with nogil: */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 348, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 350, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":347 + /* "gensim/models/fasttext_inner.pyx":349 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -4713,7 +4713,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":350 + /* "gensim/models/fasttext_inner.pyx":352 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4727,7 +4727,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":351 + /* "gensim/models/fasttext_inner.pyx":353 * * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -4738,7 +4738,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { __pyx_v_sent_idx = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":352 + /* "gensim/models/fasttext_inner.pyx":354 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -4747,7 +4747,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":353 + /* "gensim/models/fasttext_inner.pyx":355 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -4756,7 +4756,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":354 + /* "gensim/models/fasttext_inner.pyx":356 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -4767,7 +4767,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { __pyx_v_i = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":355 + /* "gensim/models/fasttext_inner.pyx":357 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4776,7 +4776,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":356 + /* "gensim/models/fasttext_inner.pyx":358 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4786,7 +4786,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":357 + /* "gensim/models/fasttext_inner.pyx":359 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -4795,7 +4795,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":356 + /* "gensim/models/fasttext_inner.pyx":358 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4804,7 +4804,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":360 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4813,7 +4813,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":359 + /* "gensim/models/fasttext_inner.pyx":361 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4823,7 +4823,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":360 + /* "gensim/models/fasttext_inner.pyx":362 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -4832,7 +4832,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":359 + /* "gensim/models/fasttext_inner.pyx":361 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4841,7 +4841,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":363 * if k > idx_end: * k = idx_end * for j in range(j, k): # <<<<<<<<<<<<<< @@ -4852,7 +4852,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_26 = __pyx_v_j; __pyx_t_26 < __pyx_t_25; __pyx_t_26+=1) { __pyx_v_j = __pyx_t_26; - /* "gensim/models/fasttext_inner.pyx":362 + /* "gensim/models/fasttext_inner.pyx":364 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4862,7 +4862,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":363 + /* "gensim/models/fasttext_inner.pyx":365 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< @@ -4871,7 +4871,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L33_continue; - /* "gensim/models/fasttext_inner.pyx":362 + /* "gensim/models/fasttext_inner.pyx":364 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4880,7 +4880,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":366 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4890,7 +4890,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":365 + /* "gensim/models/fasttext_inner.pyx":367 * continue * if hs: * fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -4899,7 +4899,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":366 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4908,7 +4908,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":369 + /* "gensim/models/fasttext_inner.pyx":371 * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4918,7 +4918,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":370 + /* "gensim/models/fasttext_inner.pyx":372 * word_locks_ngrams) * if negative: * next_random = fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -4927,7 +4927,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":369 + /* "gensim/models/fasttext_inner.pyx":371 * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4941,7 +4941,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":350 + /* "gensim/models/fasttext_inner.pyx":352 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4959,7 +4959,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":375 + /* "gensim/models/fasttext_inner.pyx":377 * next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -4967,7 +4967,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 375, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __pyx_r = __pyx_t_19; __pyx_t_19 = 0; @@ -5009,7 +5009,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":378 +/* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -5052,26 +5052,26 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 378, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 380, __pyx_L3_error) } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 378, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 380, __pyx_L3_error) } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 378, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 380, __pyx_L3_error) } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 378, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 380, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 378, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 380, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -5090,7 +5090,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 378, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 380, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5175,167 +5175,167 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT int __pyx_t_24; __Pyx_RefNannySetupContext("train_batch_cbow", 0); - /* "gensim/models/fasttext_inner.pyx":379 + /* "gensim/models/fasttext_inner.pyx":381 * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 379, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 379, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":382 * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":381 + /* "gensim/models/fasttext_inner.pyx":383 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":382 + /* "gensim/models/fasttext_inner.pyx":384 * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":384 + /* "gensim/models/fasttext_inner.pyx":386 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 384, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":385 + /* "gensim/models/fasttext_inner.pyx":387 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 385, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 387, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":386 + /* "gensim/models/fasttext_inner.pyx":388 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 388, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 388, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 388, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":387 + /* "gensim/models/fasttext_inner.pyx":389 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 387, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 389, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":390 + /* "gensim/models/fasttext_inner.pyx":392 * * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.wv.vector_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 392, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":391 + /* "gensim/models/fasttext_inner.pyx":393 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.wv.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 391, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 393, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 391, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 393, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 391, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 393, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":397 + /* "gensim/models/fasttext_inner.pyx":399 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 397, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 399, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 397, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 399, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":400 + /* "gensim/models/fasttext_inner.pyx":402 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -5345,19 +5345,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":420 + /* "gensim/models/fasttext_inner.pyx":422 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 420, __pyx_L1_error) + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":422 + /* "gensim/models/fasttext_inner.pyx":424 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5367,23 +5367,23 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":423 + /* "gensim/models/fasttext_inner.pyx":425 * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 423, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 423, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 423, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":422 + /* "gensim/models/fasttext_inner.pyx":424 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5392,7 +5392,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":425 + /* "gensim/models/fasttext_inner.pyx":427 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5402,55 +5402,55 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":426 + /* "gensim/models/fasttext_inner.pyx":428 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 426, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 428, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":429 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 427, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 429, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":428 + /* "gensim/models/fasttext_inner.pyx":430 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":425 + /* "gensim/models/fasttext_inner.pyx":427 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5459,7 +5459,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":431 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5477,41 +5477,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":432 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":431 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5520,42 +5520,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":433 + /* "gensim/models/fasttext_inner.pyx":435 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 433, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 435, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":434 + /* "gensim/models/fasttext_inner.pyx":436 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 434, __pyx_L1_error) + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 436, __pyx_L1_error) __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":437 + /* "gensim/models/fasttext_inner.pyx":439 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 437, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 437, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":438 + /* "gensim/models/fasttext_inner.pyx":440 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -5564,7 +5564,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":439 + /* "gensim/models/fasttext_inner.pyx":441 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -5575,26 +5575,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 441, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5604,7 +5604,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 439, __pyx_L1_error) + else __PYX_ERR(0, 441, __pyx_L1_error) } break; } @@ -5613,18 +5613,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":440 + /* "gensim/models/fasttext_inner.pyx":442 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":443 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -5633,7 +5633,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":440 + /* "gensim/models/fasttext_inner.pyx":442 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -5642,7 +5642,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":444 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -5653,26 +5653,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 444, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -5682,7 +5682,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 442, __pyx_L1_error) + else __PYX_ERR(0, 444, __pyx_L1_error) } break; } @@ -5691,16 +5691,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":443 + /* "gensim/models/fasttext_inner.pyx":445 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -5711,7 +5711,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":446 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5722,7 +5722,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":445 + /* "gensim/models/fasttext_inner.pyx":447 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -5731,7 +5731,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":446 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5740,7 +5740,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":448 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5753,20 +5753,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":447 + /* "gensim/models/fasttext_inner.pyx":449 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -5775,7 +5775,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":448 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5784,46 +5784,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":450 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] + * subwords = [ */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":450 + /* "gensim/models/fasttext_inner.pyx":452 * indexes[effective_words] = word.index * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] - * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords = [ # <<<<<<<<<<<<<< + * model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - /* "gensim/models/fasttext_inner.pyx":451 - * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":454 + * subwords = [ + * model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< + * ] * word_subwords = np.array(subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = len(subwords) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_16 = NULL; @@ -5841,7 +5841,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -5851,7 +5851,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -5859,7 +5859,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } else #endif { - __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); if (__pyx_t_16) { __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; @@ -5873,7 +5873,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); __pyx_t_17 = 0; __pyx_t_18 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; } @@ -5882,9 +5882,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; __pyx_t_21 = NULL; } else { - __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 454, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -5892,17 +5892,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_1))) { if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -5912,7 +5912,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 451, __pyx_L1_error) + else __PYX_ERR(0, 454, __pyx_L1_error) } break; } @@ -5921,19 +5921,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); __pyx_t_13 = 0; - /* "gensim/models/fasttext_inner.pyx":450 - * indexes[effective_words] = word.index + /* "gensim/models/fasttext_inner.pyx":453 * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] - * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords = [ + * model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + * ] */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __pyx_t_17 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { @@ -5946,13 +5946,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } if (!__pyx_t_17) { - __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); } else { #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_18)) { PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_13); } else @@ -5960,76 +5960,76 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_13); } else #endif { - __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; __Pyx_INCREF(__pyx_v_subword); __Pyx_GIVEREF(__pyx_v_subword); PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; } } __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 450, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - /* "gensim/models/fasttext_inner.pyx":451 - * - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":454 + * subwords = [ + * model.wv.hash2index[ft_hash(subword) % model.bucket] + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< + * ] * word_subwords = np.array(subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = len(subwords) */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":452 - * subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + /* "gensim/models/fasttext_inner.pyx":456 + * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + * ] * word_subwords = np.array(subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(__pyx_v_subwords); __Pyx_GIVEREF(__pyx_v_subwords); PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_v_subwords); - __pyx_t_18 = PyDict_New(); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_18 = PyDict_New(); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_18, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 452, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_18, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, __pyx_t_18); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, __pyx_t_18); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 456, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; @@ -6037,39 +6037,39 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":453 - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + /* "gensim/models/fasttext_inner.pyx":457 + * ] * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 457, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":454 + /* "gensim/models/fasttext_inner.pyx":458 * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 454, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 458, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":456 + /* "gensim/models/fasttext_inner.pyx":460 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 456, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 456, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":458 + /* "gensim/models/fasttext_inner.pyx":462 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6079,46 +6079,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":459 + /* "gensim/models/fasttext_inner.pyx":463 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 463, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 463, __pyx_L1_error) __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); - /* "gensim/models/fasttext_inner.pyx":460 + /* "gensim/models/fasttext_inner.pyx":464 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 460, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 464, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 460, __pyx_L1_error) + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 464, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":465 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 461, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 465, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 461, __pyx_L1_error) + if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 465, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":458 + /* "gensim/models/fasttext_inner.pyx":462 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6127,7 +6127,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":466 * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 # <<<<<<<<<<<<<< @@ -6136,7 +6136,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":463 + /* "gensim/models/fasttext_inner.pyx":467 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6146,7 +6146,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":464 + /* "gensim/models/fasttext_inner.pyx":468 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -6155,7 +6155,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":463 + /* "gensim/models/fasttext_inner.pyx":467 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6164,7 +6164,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":444 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -6176,7 +6176,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":469 + /* "gensim/models/fasttext_inner.pyx":473 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 476, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_19)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 480, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -6317,17 +6317,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_19))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 476, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 480, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 476, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 480, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 476, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 480, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 476, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 480, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -6337,7 +6337,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 476, __pyx_L1_error) + else __PYX_ERR(0, 480, __pyx_L1_error) } break; } @@ -6348,17 +6348,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":477 + /* "gensim/models/fasttext_inner.pyx":481 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 477, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 481, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":476 + /* "gensim/models/fasttext_inner.pyx":480 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -6368,7 +6368,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "gensim/models/fasttext_inner.pyx":480 + /* "gensim/models/fasttext_inner.pyx":484 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6382,7 +6382,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":481 + /* "gensim/models/fasttext_inner.pyx":485 * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -6393,7 +6393,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { __pyx_v_sent_idx = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":482 + /* "gensim/models/fasttext_inner.pyx":486 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -6402,7 +6402,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":483 + /* "gensim/models/fasttext_inner.pyx":487 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -6411,7 +6411,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":488 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -6422,7 +6422,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { __pyx_v_i = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":485 + /* "gensim/models/fasttext_inner.pyx":489 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6431,7 +6431,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":490 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6441,7 +6441,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":491 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -6450,7 +6450,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":490 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6459,7 +6459,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":492 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6468,7 +6468,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":493 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6478,7 +6478,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":494 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -6487,7 +6487,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":493 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6496,7 +6496,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":492 + /* "gensim/models/fasttext_inner.pyx":496 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6506,7 +6506,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":493 + /* "gensim/models/fasttext_inner.pyx":497 * * if hs: * fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -6515,7 +6515,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":492 + /* "gensim/models/fasttext_inner.pyx":496 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6524,7 +6524,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":497 + /* "gensim/models/fasttext_inner.pyx":501 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6534,7 +6534,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":498 + /* "gensim/models/fasttext_inner.pyx":502 * word_locks_ngrams) * if negative: * next_random = fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -6543,7 +6543,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":497 + /* "gensim/models/fasttext_inner.pyx":501 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6555,7 +6555,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":480 + /* "gensim/models/fasttext_inner.pyx":484 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6573,7 +6573,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":503 + /* "gensim/models/fasttext_inner.pyx":507 * cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -6581,13 +6581,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 503, __pyx_L1_error) + __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 507, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_19); __pyx_r = __pyx_t_19; __pyx_t_19 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":378 + /* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6623,7 +6623,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":506 +/* "gensim/models/fasttext_inner.pyx":510 * * * def init(): # <<<<<<<<<<<<<< @@ -6662,7 +6662,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":515 + /* "gensim/models/fasttext_inner.pyx":519 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6672,7 +6672,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":516 + /* "gensim/models/fasttext_inner.pyx":520 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6682,7 +6682,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":517 + /* "gensim/models/fasttext_inner.pyx":521 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6691,7 +6691,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":518 + /* "gensim/models/fasttext_inner.pyx":522 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6700,7 +6700,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":523 + /* "gensim/models/fasttext_inner.pyx":527 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6710,7 +6710,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":524 + /* "gensim/models/fasttext_inner.pyx":528 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6719,7 +6719,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":525 + /* "gensim/models/fasttext_inner.pyx":529 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6728,7 +6728,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":526 + /* "gensim/models/fasttext_inner.pyx":530 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6738,7 +6738,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":529 + /* "gensim/models/fasttext_inner.pyx":533 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6747,7 +6747,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":530 + /* "gensim/models/fasttext_inner.pyx":534 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6756,7 +6756,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":531 + /* "gensim/models/fasttext_inner.pyx":535 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6766,7 +6766,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":532 + /* "gensim/models/fasttext_inner.pyx":536 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6775,7 +6775,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":537 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6784,7 +6784,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":538 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6796,7 +6796,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":531 + /* "gensim/models/fasttext_inner.pyx":535 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6805,7 +6805,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":539 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6815,7 +6815,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":536 + /* "gensim/models/fasttext_inner.pyx":540 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6824,7 +6824,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":537 + /* "gensim/models/fasttext_inner.pyx":541 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6833,7 +6833,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":538 + /* "gensim/models/fasttext_inner.pyx":542 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6845,7 +6845,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":539 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6854,7 +6854,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":542 + /* "gensim/models/fasttext_inner.pyx":546 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6864,7 +6864,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":543 + /* "gensim/models/fasttext_inner.pyx":547 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6873,7 +6873,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":544 + /* "gensim/models/fasttext_inner.pyx":548 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6886,7 +6886,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":506 + /* "gensim/models/fasttext_inner.pyx":510 * * * def init(): # <<<<<<<<<<<<<< @@ -9470,7 +9470,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_fasttext_inner, __pyx_k_gensim_models_fasttext_inner, sizeof(__pyx_k_gensim_models_fasttext_inner), 0, 0, 1, 1}, {&__pyx_n_s_hash2index, __pyx_k_hash2index, sizeof(__pyx_k_hash2index), 0, 0, 1, 1}, - {&__pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_k_home_jojo_projekte_gensim_gensi, sizeof(__pyx_k_home_jojo_projekte_gensim_gensi), 0, 0, 1, 0}, + {&__pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_k_home_jbaiter_projekte_gensim_ge, sizeof(__pyx_k_home_jbaiter_projekte_gensim_ge), 0, 0, 1, 0}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_idx_end, __pyx_k_idx_end, sizeof(__pyx_k_idx_end), 0, 0, 1, 1}, @@ -9553,7 +9553,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 18, __pyx_L1_error) __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 62, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 349, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 218, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 799, __pyx_L1_error) return 0; @@ -9579,17 +9579,17 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":432 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -9700,31 +9700,31 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 247, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_train_batch_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 247, __pyx_L1_error) + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_train_batch_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 247, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":378 + /* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 378, __pyx_L1_error) + __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_train_batch_cbow, 378, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 378, __pyx_L1_error) + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_train_batch_cbow, 380, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 380, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":506 + /* "gensim/models/fasttext_inner.pyx":510 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 506, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_init, 506, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 506, __pyx_L1_error) + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_init, 510, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -10078,38 +10078,38 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 247, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":378 + /* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 378, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 378, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":506 + /* "gensim/models/fasttext_inner.pyx":510 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 506, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 506, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":546 + /* "gensim/models/fasttext_inner.pyx":550 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN * */ - __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 546, __pyx_L1_error) + __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 550, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_7 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) { @@ -10122,23 +10122,23 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) } } if (__pyx_t_7) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 546, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 550, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 546, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 550, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 546, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 550, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":547 + /* "gensim/models/fasttext_inner.pyx":551 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< * */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 547, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 551, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 388ccb1200..5d2e92bc85 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -317,8 +317,10 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): continue indexes[effective_words] = word.index - subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + subwords = [ + model.wv.hash2index[ft_hash(subword) % model.bucket] + for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + ] word_subwords = np.array([word.index] + subwords, dtype=np.uint32) subwords_idx_len[effective_words] = (len(subwords) + 1) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) @@ -447,8 +449,10 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): continue indexes[effective_words] = word.index - subwords = [model.wv.hash2index[ft_hash(subword) % model.bucket] - for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n)] + subwords = [ + model.wv.hash2index[ft_hash(subword) % model.bucket] + for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + ] word_subwords = np.array(subwords, dtype=np.uint32) subwords_idx_len[effective_words] = len(subwords) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 0aac2453c2..97061e97d6 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1569,8 +1569,7 @@ def __contains__(self, word): return True else: char_ngrams = _compute_ngrams(word, self.min_n, self.max_n) - return any(_ft_hash(ng) % self.bucket in self.hash2index - for ng in char_ngrams) + return any(_ft_hash(ng) % self.bucket in self.hash2index for ng in char_ngrams) def save(self, *args, **kwargs): """Saves the keyedvectors. This saved model can be loaded again using diff --git a/gensim/models/utils_any2vec_fast.c b/gensim/models/utils_any2vec_fast.c index e46231bb45..b0a0e4708b 100644 --- a/gensim/models/utils_any2vec_fast.c +++ b/gensim/models/utils_any2vec_fast.c @@ -719,14 +719,14 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject /* GetBuiltinName.proto */ static PyObject *__Pyx_GetBuiltinName(PyObject *name); -/* ArgTypeTest.proto */ -static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, - const char *name, int exact); - /* unicode_iter.proto */ static CYTHON_INLINE int __Pyx_init_unicode_iteration( PyObject* ustring, Py_ssize_t *length, void** data, int *kind); +/* ArgTypeTest.proto */ +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact); + /* PyObjectFormatSimple.proto */ #if CYTHON_COMPILING_IN_PYPY #define __Pyx_PyObject_FormatSimple(s, f) (\ @@ -846,6 +846,7 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /* Module declarations from 'gensim.models.utils_any2vec_fast' */ +static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject *, int __pyx_skip_dispatch); /*proto*/ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyObject *, unsigned int, unsigned int, int __pyx_skip_dispatch); /*proto*/ #define __Pyx_MODULE_NAME "gensim.models.utils_any2vec_fast" int __pyx_module_is_main_gensim__models__utils_any2vec_fast = 0; @@ -853,8 +854,6 @@ int __pyx_module_is_main_gensim__models__utils_any2vec_fast = 0; /* Implementation of 'gensim.models.utils_any2vec_fast' */ static PyObject *__pyx_builtin_range; static const char __pyx_k_[] = "<"; -static const char __pyx_k_c[] = "c"; -static const char __pyx_k_h[] = "h"; static const char __pyx_k__2[] = ">"; static const char __pyx_k_main[] = "__main__"; static const char __pyx_k_test[] = "__test__"; @@ -862,59 +861,29 @@ static const char __pyx_k_word[] = "word"; static const char __pyx_k_max_n[] = "max_n"; static const char __pyx_k_min_n[] = "min_n"; static const char __pyx_k_range[] = "range"; -static const char __pyx_k_string[] = "string"; -static const char __pyx_k_ft_hash[] = "ft_hash"; -static const char __pyx_k_home_jojo_projekte_gensim_gensi[] = "/home/jojo/projekte/gensim/gensim/models/utils_any2vec_fast.pyx"; -static const char __pyx_k_gensim_models_utils_any2vec_fast[] = "gensim.models.utils_any2vec_fast"; static PyObject *__pyx_kp_u_; static PyObject *__pyx_kp_u__2; -static PyObject *__pyx_n_s_c; -static PyObject *__pyx_n_s_ft_hash; -static PyObject *__pyx_n_s_gensim_models_utils_any2vec_fast; -static PyObject *__pyx_n_s_h; -static PyObject *__pyx_kp_s_home_jojo_projekte_gensim_gensi; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_max_n; static PyObject *__pyx_n_s_min_n; static PyObject *__pyx_n_s_range; -static PyObject *__pyx_n_s_string; static PyObject *__pyx_n_s_test; static PyObject *__pyx_n_s_word; static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n); /* proto */ static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; -static PyObject *__pyx_tuple__3; -static PyObject *__pyx_codeobj__4; /* "gensim/models/utils_any2vec_fast.pyx":7 * # coding: utf-8 * - * def ft_hash(unicode string): # <<<<<<<<<<<<<< + * cpdef ft_hash(unicode string): # <<<<<<<<<<<<<< * cdef unsigned int h = 2166136261 * for c in string: */ -/* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ -static PyMethodDef __pyx_mdef_6gensim_6models_18utils_any2vec_fast_1ft_hash = {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash, METH_O, 0}; -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string) { - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("ft_hash (wrapper)", 0); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) __PYX_ERR(0, 7, __pyx_L1_error) - __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(__pyx_self, ((PyObject*)__pyx_v_string)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { +static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { unsigned int __pyx_v_h; Py_UCS4 __pyx_v_c; PyObject *__pyx_r = NULL; @@ -931,7 +900,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN /* "gensim/models/utils_any2vec_fast.pyx":8 * - * def ft_hash(unicode string): + * cpdef ft_hash(unicode string): * cdef unsigned int h = 2166136261 # <<<<<<<<<<<<<< * for c in string: * h ^= ord(c) @@ -939,7 +908,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN __pyx_v_h = 0x811C9DC5; /* "gensim/models/utils_any2vec_fast.pyx":9 - * def ft_hash(unicode string): + * cpdef ft_hash(unicode string): * cdef unsigned int h = 2166136261 * for c in string: # <<<<<<<<<<<<<< * h ^= ord(c) @@ -993,7 +962,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN /* "gensim/models/utils_any2vec_fast.pyx":7 * # coding: utf-8 * - * def ft_hash(unicode string): # <<<<<<<<<<<<<< + * cpdef ft_hash(unicode string): # <<<<<<<<<<<<<< * cdef unsigned int h = 2166136261 * for c in string: */ @@ -1003,6 +972,47 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_8); __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("ft_hash (wrapper)", 0); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) __PYX_ERR(0, 7, __pyx_L1_error) + __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(__pyx_self, ((PyObject*)__pyx_v_string)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("ft_hash", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(__pyx_v_string, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); @@ -1388,6 +1398,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C } static PyMethodDef __pyx_methods[] = { + {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash, METH_O, 0}, {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}, {0, 0, 0, 0} }; @@ -1413,16 +1424,10 @@ static struct PyModuleDef __pyx_moduledef = { static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0}, {&__pyx_kp_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 0}, - {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, - {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, - {&__pyx_n_s_gensim_models_utils_any2vec_fast, __pyx_k_gensim_models_utils_any2vec_fast, sizeof(__pyx_k_gensim_models_utils_any2vec_fast), 0, 0, 1, 1}, - {&__pyx_n_s_h, __pyx_k_h, sizeof(__pyx_k_h), 0, 0, 1, 1}, - {&__pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_k_home_jojo_projekte_gensim_gensi, sizeof(__pyx_k_home_jojo_projekte_gensim_gensi), 0, 0, 1, 0}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_max_n, __pyx_k_max_n, sizeof(__pyx_k_max_n), 0, 0, 1, 1}, {&__pyx_n_s_min_n, __pyx_k_min_n, sizeof(__pyx_k_min_n), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, - {&__pyx_n_s_string, __pyx_k_string, sizeof(__pyx_k_string), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0} @@ -1437,23 +1442,8 @@ static int __Pyx_InitCachedBuiltins(void) { static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - - /* "gensim/models/utils_any2vec_fast.pyx":7 - * # coding: utf-8 - * - * def ft_hash(unicode string): # <<<<<<<<<<<<<< - * cdef unsigned int h = 2166136261 - * for c in string: - */ - __pyx_tuple__3 = PyTuple_Pack(3, __pyx_n_s_string, __pyx_n_s_h, __pyx_n_s_c); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 7, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__3); - __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_codeobj__4 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__3, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jojo_projekte_gensim_gensi, __pyx_n_s_ft_hash, 7, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__4)) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; } static int __Pyx_InitGlobals(void) { @@ -1557,18 +1547,6 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - /* "gensim/models/utils_any2vec_fast.pyx":7 - * # coding: utf-8 - * - * def ft_hash(unicode string): # <<<<<<<<<<<<<< - * cdef unsigned int h = 2166136261 - * for c in string: - */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_18utils_any2vec_fast_1ft_hash, NULL, __pyx_n_s_gensim_models_utils_any2vec_fast); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_ft_hash, __pyx_t_1) < 0) __PYX_ERR(0, 7, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False @@ -1633,6 +1611,22 @@ static PyObject *__Pyx_GetBuiltinName(PyObject *name) { return result; } +/* unicode_iter */ +static CYTHON_INLINE int __Pyx_init_unicode_iteration( + PyObject* ustring, Py_ssize_t *length, void** data, int *kind) { +#if CYTHON_PEP393_ENABLED + if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return -1; + *kind = PyUnicode_KIND(ustring); + *length = PyUnicode_GET_LENGTH(ustring); + *data = PyUnicode_DATA(ustring); +#else + *kind = 0; + *length = PyUnicode_GET_SIZE(ustring); + *data = (void*)PyUnicode_AS_UNICODE(ustring); +#endif + return 0; +} + /* ArgTypeTest */ static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { PyErr_Format(PyExc_TypeError, @@ -1660,22 +1654,6 @@ static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, in return 0; } -/* unicode_iter */ -static CYTHON_INLINE int __Pyx_init_unicode_iteration( - PyObject* ustring, Py_ssize_t *length, void** data, int *kind) { -#if CYTHON_PEP393_ENABLED - if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return -1; - *kind = PyUnicode_KIND(ustring); - *length = PyUnicode_GET_LENGTH(ustring); - *data = PyUnicode_DATA(ustring); -#else - *kind = 0; - *length = PyUnicode_GET_SIZE(ustring); - *data = (void*)PyUnicode_AS_UNICODE(ustring); -#endif - return 0; -} - /* JoinPyUnicode */ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, CYTHON_UNUSED Py_UCS4 max_char) { diff --git a/gensim/models/utils_any2vec_fast.pyx b/gensim/models/utils_any2vec_fast.pyx index 05745c0526..81727af0bf 100644 --- a/gensim/models/utils_any2vec_fast.pyx +++ b/gensim/models/utils_any2vec_fast.pyx @@ -4,7 +4,7 @@ # cython: cdivision=True # coding: utf-8 -def ft_hash(unicode string): +cpdef ft_hash(unicode string): cdef unsigned int h = 2166136261 for c in string: h ^= ord(c) From 764071b1bd321c99419d222462c9dd47bf8a2097 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Fri, 23 Feb 2018 15:09:42 +0100 Subject: [PATCH 08/20] Rename utils_any2vec_fast to _utils_any2vec --- ...{utils_any2vec_fast.c => _utils_any2vec.c} | 126 +++++++++--------- ...ls_any2vec_fast.pyx => _utils_any2vec.pyx} | 0 gensim/models/fasttext_inner.c | 12 +- gensim/models/fasttext_inner.pyx | 2 +- gensim/models/utils_any2vec.py | 2 +- setup.py | 4 +- 6 files changed, 73 insertions(+), 73 deletions(-) rename gensim/models/{utils_any2vec_fast.c => _utils_any2vec.c} (95%) rename gensim/models/{utils_any2vec_fast.pyx => _utils_any2vec.pyx} (100%) diff --git a/gensim/models/utils_any2vec_fast.c b/gensim/models/_utils_any2vec.c similarity index 95% rename from gensim/models/utils_any2vec_fast.c rename to gensim/models/_utils_any2vec.c index b0a0e4708b..48ade88f1c 100644 --- a/gensim/models/utils_any2vec_fast.c +++ b/gensim/models/_utils_any2vec.c @@ -3,7 +3,7 @@ /* BEGIN: Cython Metadata { "distutils": {}, - "module_name": "gensim.models.utils_any2vec_fast" + "module_name": "gensim.models._utils_any2vec" } END: Cython Metadata */ @@ -434,8 +434,8 @@ static CYTHON_INLINE float __PYX_NAN() { #endif #endif -#define __PYX_HAVE__gensim__models__utils_any2vec_fast -#define __PYX_HAVE_API__gensim__models__utils_any2vec_fast +#define __PYX_HAVE__gensim__models___utils_any2vec +#define __PYX_HAVE_API__gensim__models___utils_any2vec #ifdef _OPENMP #include #endif /* _OPENMP */ @@ -631,7 +631,7 @@ static const char *__pyx_filename; static const char *__pyx_f[] = { - "gensim/models/utils_any2vec_fast.pyx", + "gensim/models/_utils_any2vec.pyx", }; /*--- Type declarations ---*/ @@ -845,13 +845,13 @@ static int __Pyx_check_binary_version(void); static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); -/* Module declarations from 'gensim.models.utils_any2vec_fast' */ -static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject *, int __pyx_skip_dispatch); /*proto*/ -static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyObject *, unsigned int, unsigned int, int __pyx_skip_dispatch); /*proto*/ -#define __Pyx_MODULE_NAME "gensim.models.utils_any2vec_fast" -int __pyx_module_is_main_gensim__models__utils_any2vec_fast = 0; +/* Module declarations from 'gensim.models._utils_any2vec' */ +static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *, int __pyx_skip_dispatch); /*proto*/ +static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObject *, unsigned int, unsigned int, int __pyx_skip_dispatch); /*proto*/ +#define __Pyx_MODULE_NAME "gensim.models._utils_any2vec" +int __pyx_module_is_main_gensim__models___utils_any2vec = 0; -/* Implementation of 'gensim.models.utils_any2vec_fast' */ +/* Implementation of 'gensim.models._utils_any2vec' */ static PyObject *__pyx_builtin_range; static const char __pyx_k_[] = "<"; static const char __pyx_k__2[] = ">"; @@ -869,12 +869,12 @@ static PyObject *__pyx_n_s_min_n; static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_test; static PyObject *__pyx_n_s_word; -static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n); /* proto */ static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; -/* "gensim/models/utils_any2vec_fast.pyx":7 +/* "gensim/models/_utils_any2vec.pyx":7 * # coding: utf-8 * * cpdef ft_hash(unicode string): # <<<<<<<<<<<<<< @@ -882,8 +882,8 @@ static PyObject *__pyx_int_1; * for c in string: */ -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ -static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { +static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { unsigned int __pyx_v_h; Py_UCS4 __pyx_v_c; PyObject *__pyx_r = NULL; @@ -898,7 +898,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("ft_hash", 0); - /* "gensim/models/utils_any2vec_fast.pyx":8 + /* "gensim/models/_utils_any2vec.pyx":8 * * cpdef ft_hash(unicode string): * cdef unsigned int h = 2166136261 # <<<<<<<<<<<<<< @@ -907,7 +907,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * */ __pyx_v_h = 0x811C9DC5; - /* "gensim/models/utils_any2vec_fast.pyx":9 + /* "gensim/models/_utils_any2vec.pyx":9 * cpdef ft_hash(unicode string): * cdef unsigned int h = 2166136261 * for c in string: # <<<<<<<<<<<<<< @@ -925,7 +925,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * __pyx_t_2 = __pyx_t_7; __pyx_v_c = __Pyx_PyUnicode_READ(__pyx_t_5, __pyx_t_4, __pyx_t_2); - /* "gensim/models/utils_any2vec_fast.pyx":10 + /* "gensim/models/_utils_any2vec.pyx":10 * cdef unsigned int h = 2166136261 * for c in string: * h ^= ord(c) # <<<<<<<<<<<<<< @@ -934,7 +934,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * */ __pyx_v_h = (__pyx_v_h ^ ((long)__pyx_v_c)); - /* "gensim/models/utils_any2vec_fast.pyx":11 + /* "gensim/models/_utils_any2vec.pyx":11 * for c in string: * h ^= ord(c) * h *= 16777619 # <<<<<<<<<<<<<< @@ -945,7 +945,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":12 + /* "gensim/models/_utils_any2vec.pyx":12 * h ^= ord(c) * h *= 16777619 * return h # <<<<<<<<<<<<<< @@ -959,7 +959,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * __pyx_t_8 = 0; goto __pyx_L0; - /* "gensim/models/utils_any2vec_fast.pyx":7 + /* "gensim/models/_utils_any2vec.pyx":7 * # coding: utf-8 * * cpdef ft_hash(unicode string): # <<<<<<<<<<<<<< @@ -971,7 +971,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("gensim.models._utils_any2vec.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); @@ -980,13 +980,13 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(PyObject * } /* Python wrapper */ -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string) { +static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("ft_hash (wrapper)", 0); if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) __PYX_ERR(0, 7, __pyx_L1_error) - __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(__pyx_self, ((PyObject*)__pyx_v_string)); + __pyx_r = __pyx_pf_6gensim_6models_14_utils_any2vec_ft_hash(__pyx_self, ((PyObject*)__pyx_v_string)); /* function exit code */ goto __pyx_L0; @@ -997,13 +997,13 @@ static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash(PyObject return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { +static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_ft_hash(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("ft_hash", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_f_6gensim_6models_18utils_any2vec_fast_ft_hash(__pyx_v_string, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) + __pyx_t_1 = __pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(__pyx_v_string, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -1012,7 +1012,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("gensim.models._utils_any2vec.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); @@ -1020,7 +1020,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN return __pyx_r; } -/* "gensim/models/utils_any2vec_fast.pyx":15 +/* "gensim/models/_utils_any2vec.pyx":15 * * * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< @@ -1028,8 +1028,8 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_ft_hash(CYTHON_UN * ngrams = [] */ -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n, CYTHON_UNUSED int __pyx_skip_dispatch) { +static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n, CYTHON_UNUSED int __pyx_skip_dispatch) { PyObject *__pyx_v_extended_word = 0; PyObject *__pyx_v_ngrams = NULL; PyObject *__pyx_v_ngram_length = NULL; @@ -1050,7 +1050,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO int __pyx_t_12; __Pyx_RefNannySetupContext("compute_ngrams", 0); - /* "gensim/models/utils_any2vec_fast.pyx":16 + /* "gensim/models/_utils_any2vec.pyx":16 * * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): * cdef unicode extended_word = f'<{word}>' # <<<<<<<<<<<<<< @@ -1082,7 +1082,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __pyx_v_extended_word = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":17 + /* "gensim/models/_utils_any2vec.pyx":17 * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): * cdef unicode extended_word = f'<{word}>' * ngrams = [] # <<<<<<<<<<<<<< @@ -1094,7 +1094,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __pyx_v_ngrams = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":18 + /* "gensim/models/_utils_any2vec.pyx":18 * cdef unicode extended_word = f'<{word}>' * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): # <<<<<<<<<<<<<< @@ -1166,7 +1166,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __Pyx_XDECREF_SET(__pyx_v_ngram_length, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":19 + /* "gensim/models/_utils_any2vec.pyx":19 * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): # <<<<<<<<<<<<<< @@ -1236,7 +1236,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":20 + /* "gensim/models/_utils_any2vec.pyx":20 * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) # <<<<<<<<<<<<<< @@ -1252,7 +1252,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_ngrams, __pyx_t_1); if (unlikely(__pyx_t_12 == -1)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":19 + /* "gensim/models/_utils_any2vec.pyx":19 * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): # <<<<<<<<<<<<<< @@ -1262,7 +1262,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":18 + /* "gensim/models/_utils_any2vec.pyx":18 * cdef unicode extended_word = f'<{word}>' * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): # <<<<<<<<<<<<<< @@ -1272,7 +1272,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "gensim/models/utils_any2vec_fast.pyx":21 + /* "gensim/models/_utils_any2vec.pyx":21 * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) * return ngrams # <<<<<<<<<<<<<< @@ -1282,7 +1282,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __pyx_r = __pyx_v_ngrams; goto __pyx_L0; - /* "gensim/models/utils_any2vec_fast.pyx":15 + /* "gensim/models/_utils_any2vec.pyx":15 * * * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< @@ -1295,7 +1295,7 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("gensim.models._utils_any2vec.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_extended_word); @@ -1308,8 +1308,8 @@ static PyObject *__pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(PyO } /* Python wrapper */ -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_word = 0; unsigned int __pyx_v_min_n; unsigned int __pyx_v_max_n; @@ -1363,24 +1363,24 @@ static PyObject *__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams(P __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error) __pyx_L3_error:; - __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("gensim.models._utils_any2vec.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(__pyx_self, __pyx_v_word, __pyx_v_min_n, __pyx_v_max_n); + __pyx_r = __pyx_pf_6gensim_6models_14_utils_any2vec_2compute_ngrams(__pyx_self, __pyx_v_word, __pyx_v_min_n, __pyx_v_max_n); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n) { +static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_2compute_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_word, unsigned int __pyx_v_min_n, unsigned int __pyx_v_max_n) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("compute_ngrams", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_f_6gensim_6models_18utils_any2vec_fast_compute_ngrams(__pyx_v_word, __pyx_v_min_n, __pyx_v_max_n, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) + __pyx_t_1 = __pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(__pyx_v_word, __pyx_v_min_n, __pyx_v_max_n, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -1389,7 +1389,7 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("gensim.models.utils_any2vec_fast.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("gensim.models._utils_any2vec.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); @@ -1398,8 +1398,8 @@ static PyObject *__pyx_pf_6gensim_6models_18utils_any2vec_fast_2compute_ngrams(C } static PyMethodDef __pyx_methods[] = { - {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_1ft_hash, METH_O, 0}, - {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_18utils_any2vec_fast_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}, + {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash, METH_O, 0}, + {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}, {0, 0, 0, 0} }; @@ -1410,7 +1410,7 @@ static struct PyModuleDef __pyx_moduledef = { #else PyModuleDef_HEAD_INIT, #endif - "utils_any2vec_fast", + "_utils_any2vec", 0, /* m_doc */ -1, /* m_size */ __pyx_methods /* m_methods */, @@ -1456,11 +1456,11 @@ static int __Pyx_InitGlobals(void) { } #if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initutils_any2vec_fast(void); /*proto*/ -PyMODINIT_FUNC initutils_any2vec_fast(void) +PyMODINIT_FUNC init_utils_any2vec(void); /*proto*/ +PyMODINIT_FUNC init_utils_any2vec(void) #else -PyMODINIT_FUNC PyInit_utils_any2vec_fast(void); /*proto*/ -PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) +PyMODINIT_FUNC PyInit__utils_any2vec(void); /*proto*/ +PyMODINIT_FUNC PyInit__utils_any2vec(void) #endif { PyObject *__pyx_t_1 = NULL; @@ -1474,7 +1474,7 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) Py_FatalError("failed to import 'refnanny' module"); } #endif - __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_utils_any2vec_fast(void)", 0); + __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit__utils_any2vec(void)", 0); if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) @@ -1503,7 +1503,7 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) #endif /*--- Module creation code ---*/ #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("utils_any2vec_fast", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + __pyx_m = Py_InitModule4("_utils_any2vec", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); #else __pyx_m = PyModule_Create(&__pyx_moduledef); #endif @@ -1520,14 +1520,14 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - if (__pyx_module_is_main_gensim__models__utils_any2vec_fast) { + if (__pyx_module_is_main_gensim__models___utils_any2vec) { if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) } #if PY_MAJOR_VERSION >= 3 { PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) - if (!PyDict_GetItemString(modules, "gensim.models.utils_any2vec_fast")) { - if (unlikely(PyDict_SetItemString(modules, "gensim.models.utils_any2vec_fast", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "gensim.models._utils_any2vec")) { + if (unlikely(PyDict_SetItemString(modules, "gensim.models._utils_any2vec", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) } } #endif @@ -1547,7 +1547,7 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - /* "gensim/models/utils_any2vec_fast.pyx":1 + /* "gensim/models/_utils_any2vec.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False * # cython: wraparound=False @@ -1564,11 +1564,11 @@ PyMODINIT_FUNC PyInit_utils_any2vec_fast(void) __Pyx_XDECREF(__pyx_t_1); if (__pyx_m) { if (__pyx_d) { - __Pyx_AddTraceback("init gensim.models.utils_any2vec_fast", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("init gensim.models._utils_any2vec", __pyx_clineno, __pyx_lineno, __pyx_filename); } Py_DECREF(__pyx_m); __pyx_m = 0; } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init gensim.models.utils_any2vec_fast"); + PyErr_SetString(PyExc_ImportError, "init gensim.models._utils_any2vec"); } __pyx_L0:; __Pyx_RefNannyFinishContext(); diff --git a/gensim/models/utils_any2vec_fast.pyx b/gensim/models/_utils_any2vec.pyx similarity index 100% rename from gensim/models/utils_any2vec_fast.pyx rename to gensim/models/_utils_any2vec.pyx diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 7fe2508926..45eef6a8d0 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1593,6 +1593,7 @@ static const char __pyx_k_RuntimeError[] = "RuntimeError"; static const char __pyx_k_sentence_idx[] = "sentence_idx"; static const char __pyx_k_subwords_idx[] = "subwords_idx"; static const char __pyx_k_cum_table_len[] = "cum_table_len"; +static const char __pyx_k_utils_any2vec[] = "_utils_any2vec"; static const char __pyx_k_vectors_vocab[] = "vectors_vocab"; static const char __pyx_k_word_subwords[] = "word_subwords"; static const char __pyx_k_compute_ngrams[] = "compute_ngrams"; @@ -1607,7 +1608,6 @@ static const char __pyx_k_word_locks_vocab[] = "word_locks_vocab"; static const char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static const char __pyx_k_word_locks_ngrams[] = "word_locks_ngrams"; static const char __pyx_k_MAX_WORDS_IN_BATCH[] = "MAX_WORDS_IN_BATCH"; -static const char __pyx_k_utils_any2vec_fast[] = "utils_any2vec_fast"; static const char __pyx_k_effective_sentences[] = "effective_sentences"; static const char __pyx_k_vectors_vocab_lockf[] = "vectors_vocab_lockf"; static const char __pyx_k_vectors_ngrams_lockf[] = "vectors_ngrams_lockf"; @@ -1712,7 +1712,7 @@ static PyObject *__pyx_n_s_train_batch_sg; static PyObject *__pyx_n_s_trainables; static PyObject *__pyx_n_s_uint32; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; -static PyObject *__pyx_n_s_utils_any2vec_fast; +static PyObject *__pyx_n_s_utils_any2vec; static PyObject *__pyx_n_s_vector_size; static PyObject *__pyx_n_s_vectors_ngrams; static PyObject *__pyx_n_s_vectors_ngrams_lockf; @@ -9529,7 +9529,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_trainables, __pyx_k_trainables, sizeof(__pyx_k_trainables), 0, 0, 1, 1}, {&__pyx_n_s_uint32, __pyx_k_uint32, sizeof(__pyx_k_uint32), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, - {&__pyx_n_s_utils_any2vec_fast, __pyx_k_utils_any2vec_fast, sizeof(__pyx_k_utils_any2vec_fast), 0, 0, 1, 1}, + {&__pyx_n_s_utils_any2vec, __pyx_k_utils_any2vec, sizeof(__pyx_k_utils_any2vec), 0, 0, 1, 1}, {&__pyx_n_s_vector_size, __pyx_k_vector_size, sizeof(__pyx_k_vector_size), 0, 0, 1, 1}, {&__pyx_n_s_vectors_ngrams, __pyx_k_vectors_ngrams, sizeof(__pyx_k_vectors_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_vectors_ngrams_lockf, __pyx_k_vectors_ngrams_lockf, sizeof(__pyx_k_vectors_ngrams_lockf), 0, 0, 1, 1}, @@ -9963,7 +9963,7 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< * - * from utils_any2vec_fast import compute_ngrams, ft_hash + * from _utils_any2vec import compute_ngrams, ft_hash */ __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 20, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_10); @@ -10008,7 +10008,7 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) /* "gensim/models/fasttext_inner.pyx":22 * import scipy.linalg.blas as fblas * - * from utils_any2vec_fast import compute_ngrams, ft_hash # <<<<<<<<<<<<<< + * from _utils_any2vec import compute_ngrams, ft_hash # <<<<<<<<<<<<<< * from word2vec_inner cimport bisect_left, random_int32, \ * scopy, saxpy, sdot, dsdot, snrm2, sscal, \ */ @@ -10020,7 +10020,7 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) __Pyx_INCREF(__pyx_n_s_ft_hash); __Pyx_GIVEREF(__pyx_n_s_ft_hash); PyList_SET_ITEM(__pyx_t_9, 1, __pyx_n_s_ft_hash); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_utils_any2vec_fast, __pyx_t_9, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error) + __pyx_t_3 = __Pyx_Import(__pyx_n_s_utils_any2vec, __pyx_t_9, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_9 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 5d2e92bc85..83255472b1 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -19,7 +19,7 @@ except ImportError: # in scipy > 0.15, fblas function has been removed import scipy.linalg.blas as fblas -from utils_any2vec_fast import compute_ngrams, ft_hash +from _utils_any2vec import compute_ngrams, ft_hash from word2vec_inner cimport bisect_left, random_int32, \ scopy, saxpy, sdot, dsdot, snrm2, sscal, \ REAL_t, EXP_TABLE, \ diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py index 570b620c72..73bbdbde94 100644 --- a/gensim/models/utils_any2vec.py +++ b/gensim/models/utils_any2vec.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) try: - from gensim.models.utils_any2vec_fast import ( + from gensim.models._utils_any2vec import ( ft_hash as _ft_hash, compute_ngrams as _compute_ngrams) except ImportError: diff --git a/setup.py b/setup.py index e11d7d8392..04b1efda27 100644 --- a/setup.py +++ b/setup.py @@ -261,8 +261,8 @@ def finalize_options(self): Extension('gensim.models.fasttext_inner', sources=['./gensim/models/fasttext_inner.c'], include_dirs=[model_dir]), - Extension('gensim.models.utils_any2vec_fast', - sources=['./gensim/models/utils_any2vec_fast.c'], + Extension('gensim.models._utils_any2vec', + sources=['./gensim/models/_utils_any2vec.c'], include_dirs=[model_dir]) ], cmdclass=cmdclass, From c6f347e80f4162e6cdc3264464f54020da9af0ed Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Mon, 26 Feb 2018 18:50:09 +0100 Subject: [PATCH 09/20] fasttext: Cache ngram buckets for words during training This removes the expensive calls to `compute_ngrams` and `ft_hash` during training and uses a simple lookup in an int -> int[] mapping instead, resulting in a dramatic increase in training performance. --- gensim/models/fasttext.py | 14 +- gensim/models/fasttext_inner.c | 2326 ++++++++++++------------------ gensim/models/fasttext_inner.pyx | 13 +- gensim/models/keyedvectors.py | 1 + 4 files changed, 954 insertions(+), 1400 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 908b0bfe03..375ca29b03 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -396,6 +396,7 @@ def _clear_post_train(self): self.wv.vectors_norm = None self.wv.vectors_vocab_norm = None self.wv.vectors_ngrams_norm = None + self.wv.buckets_word = None def estimate_memory(self, vocab_size=None, report=None): vocab_size = vocab_size or len(self.wv.vocab) @@ -774,10 +775,12 @@ def init_ngrams_weights(self, wv, update=False, vocabulary=None): self.vectors_ngrams_lockf = ones((self.bucket, wv.vector_size), dtype=REAL) wv.hash2index = {} + wv.buckets_word = {} ngram_indices = [] new_hash_count = 0 wv.num_ngram_vectors = 0 - for word in wv.vocab.keys(): + for word, vocab in wv.vocab.items(): + buckets = [] for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): ngram_hash = _ft_hash(ngram) % self.bucket if ngram_hash not in wv.hash2index: @@ -785,6 +788,8 @@ def init_ngrams_weights(self, wv, update=False, vocabulary=None): ngram_indices.append(ngram_hash) wv.hash2index[ngram_hash] = new_hash_count new_hash_count = new_hash_count + 1 + buckets.append(wv.hash2index[ngram_hash]) + wv.buckets_word[vocab.index] = tuple(buckets) logger.info("Total number of ngrams is %d", wv.num_ngram_vectors) @@ -792,15 +797,20 @@ def init_ngrams_weights(self, wv, update=False, vocabulary=None): self.vectors_ngrams_lockf = self.vectors_ngrams_lockf.take(ngram_indices, axis=0) self.reset_ngrams_weights(wv) else: + if not wv.buckets_word: + wv.buckets_word = {} new_hash_count = 0 num_new_ngrams = 0 - for word in wv.vocab.keys(): + for word, vocab in wv.vocab.items(): + buckets = [] for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): ngram_hash = _ft_hash(ngram) % self.bucket if ngram_hash not in wv.hash2index: wv.hash2index[ngram_hash] = new_hash_count + self.old_hash2index_len new_hash_count = new_hash_count + 1 num_new_ngrams += 1 + buckets.append(wv.hash2index[ngram_hash]) + wv.buckets_word[vocab.index] = tuple(buckets) wv.num_ngram_vectors += num_new_ngrams logger.info("Number of new ngrams is %d", num_new_ngrams) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 45eef6a8d0..6b3b433488 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1133,31 +1133,6 @@ static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObje #define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL) #endif -/* ListCompAppend.proto */ -#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS -static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { - PyListObject* L = (PyListObject*) list; - Py_ssize_t len = Py_SIZE(list); - if (likely(L->allocated > len)) { - Py_INCREF(x); - PyList_SET_ITEM(list, len, x); - Py_SIZE(list) = len+1; - return 0; - } - return PyList_Append(list, x); -} -#else -#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) -#endif - -/* PyObjectCallMethO.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -/* PyObjectCallOneArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - /* PyThreadStateGet.proto */ #if CYTHON_FAST_THREAD_STATE #define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; @@ -1248,6 +1223,14 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /* ImportFrom.proto */ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + /* PyObjectCallNoArg.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); @@ -1541,8 +1524,6 @@ static const char __pyx_k_d_res[] = "d_res"; static const char __pyx_k_dtype[] = "dtype"; static const char __pyx_k_fblas[] = "fblas"; static const char __pyx_k_index[] = "index"; -static const char __pyx_k_max_n[] = "max_n"; -static const char __pyx_k_min_n[] = "min_n"; static const char __pyx_k_model[] = "model"; static const char __pyx_k_numpy[] = "numpy"; static const char __pyx_k_p_res[] = "p_res"; @@ -1550,7 +1531,6 @@ static const char __pyx_k_point[] = "point"; static const char __pyx_k_range[] = "range"; static const char __pyx_k_token[] = "token"; static const char __pyx_k_vocab[] = "vocab"; -static const char __pyx_k_bucket[] = "bucket"; static const char __pyx_k_import[] = "__import__"; static const char __pyx_k_neu1_2[] = "neu1"; static const char __pyx_k_points[] = "points"; @@ -1561,11 +1541,9 @@ static const char __pyx_k_window[] = "window"; static const char __pyx_k_work_2[] = "work"; static const char __pyx_k_alpha_2[] = "_alpha"; static const char __pyx_k_float32[] = "float32"; -static const char __pyx_k_ft_hash[] = "ft_hash"; static const char __pyx_k_idx_end[] = "idx_end"; static const char __pyx_k_indexes[] = "indexes"; static const char __pyx_k_randint[] = "randint"; -static const char __pyx_k_subword[] = "subword"; static const char __pyx_k_syn1neg[] = "syn1neg"; static const char __pyx_k_vlookup[] = "vlookup"; static const char __pyx_k_codelens[] = "codelens"; @@ -1579,7 +1557,6 @@ static const char __pyx_k_enumerate[] = "enumerate"; static const char __pyx_k_idx_start[] = "idx_start"; static const char __pyx_k_sentences[] = "sentences"; static const char __pyx_k_ValueError[] = "ValueError"; -static const char __pyx_k_hash2index[] = "hash2index"; static const char __pyx_k_sample_int[] = "sample_int"; static const char __pyx_k_syn0_vocab[] = "syn0_vocab"; static const char __pyx_k_trainables[] = "trainables"; @@ -1590,13 +1567,12 @@ static const char __pyx_k_syn0_ngrams[] = "syn0_ngrams"; static const char __pyx_k_vector_size[] = "vector_size"; static const char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static const char __pyx_k_RuntimeError[] = "RuntimeError"; +static const char __pyx_k_buckets_word[] = "buckets_word"; static const char __pyx_k_sentence_idx[] = "sentence_idx"; static const char __pyx_k_subwords_idx[] = "subwords_idx"; static const char __pyx_k_cum_table_len[] = "cum_table_len"; -static const char __pyx_k_utils_any2vec[] = "_utils_any2vec"; static const char __pyx_k_vectors_vocab[] = "vectors_vocab"; static const char __pyx_k_word_subwords[] = "word_subwords"; -static const char __pyx_k_compute_ngrams[] = "compute_ngrams"; static const char __pyx_k_subword_arrays[] = "subword_arrays"; static const char __pyx_k_train_batch_sg[] = "train_batch_sg"; static const char __pyx_k_vectors_ngrams[] = "vectors_ngrams"; @@ -1634,12 +1610,11 @@ static PyObject *__pyx_n_s__14; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; static PyObject *__pyx_n_s_array; -static PyObject *__pyx_n_s_bucket; +static PyObject *__pyx_n_s_buckets_word; static PyObject *__pyx_n_s_cbow_mean; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; static PyObject *__pyx_n_s_codes; -static PyObject *__pyx_n_s_compute_ngrams; static PyObject *__pyx_n_s_cum_table; static PyObject *__pyx_n_s_cum_table_len; static PyObject *__pyx_n_s_d_res; @@ -1650,9 +1625,7 @@ static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; -static PyObject *__pyx_n_s_ft_hash; static PyObject *__pyx_n_s_gensim_models_fasttext_inner; -static PyObject *__pyx_n_s_hash2index; static PyObject *__pyx_kp_s_home_jbaiter_projekte_gensim_ge; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; @@ -1668,8 +1641,6 @@ static PyObject *__pyx_n_s_k; static PyObject *__pyx_n_s_l1; static PyObject *__pyx_n_s_l1_2; static PyObject *__pyx_n_s_main; -static PyObject *__pyx_n_s_max_n; -static PyObject *__pyx_n_s_min_n; static PyObject *__pyx_n_s_model; static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous; static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou; @@ -1696,7 +1667,6 @@ static PyObject *__pyx_n_s_sent_idx; static PyObject *__pyx_n_s_sentence_idx; static PyObject *__pyx_n_s_sentences; static PyObject *__pyx_n_s_size; -static PyObject *__pyx_n_s_subword; static PyObject *__pyx_n_s_subword_arrays; static PyObject *__pyx_n_s_subwords; static PyObject *__pyx_n_s_subwords_idx; @@ -1712,7 +1682,6 @@ static PyObject *__pyx_n_s_train_batch_sg; static PyObject *__pyx_n_s_trainables; static PyObject *__pyx_n_s_uint32; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; -static PyObject *__pyx_n_s_utils_any2vec; static PyObject *__pyx_n_s_vector_size; static PyObject *__pyx_n_s_vectors_ngrams; static PyObject *__pyx_n_s_vectors_ngrams_lockf; @@ -1761,7 +1730,7 @@ static PyObject *__pyx_codeobj__16; static PyObject *__pyx_codeobj__18; static PyObject *__pyx_codeobj__20; -/* "gensim/models/fasttext_inner.pyx":43 +/* "gensim/models/fasttext_inner.pyx":42 * cdef REAL_t ONEF = 1.0 * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -1788,7 +1757,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente int __pyx_t_4; int __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":51 + /* "gensim/models/fasttext_inner.pyx":50 * * cdef long long a * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -1797,7 +1766,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":52 + /* "gensim/models/fasttext_inner.pyx":51 * cdef long long a * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1806,7 +1775,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":53 + /* "gensim/models/fasttext_inner.pyx":52 * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -1815,7 +1784,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/fasttext_inner.pyx":58 + /* "gensim/models/fasttext_inner.pyx":57 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1824,7 +1793,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":59 + /* "gensim/models/fasttext_inner.pyx":58 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1833,7 +1802,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":61 + /* "gensim/models/fasttext_inner.pyx":60 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -1842,7 +1811,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":62 + /* "gensim/models/fasttext_inner.pyx":61 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -1853,7 +1822,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":63 + /* "gensim/models/fasttext_inner.pyx":62 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -1863,7 +1832,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":64 + /* "gensim/models/fasttext_inner.pyx":63 * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< @@ -1872,7 +1841,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":65 + /* "gensim/models/fasttext_inner.pyx":64 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< @@ -1881,7 +1850,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":67 + /* "gensim/models/fasttext_inner.pyx":66 * sscal(&size, &norm_factor, l1 , &ONE) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -1892,7 +1861,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":68 + /* "gensim/models/fasttext_inner.pyx":67 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -1902,7 +1871,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_d == 0) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":69 + /* "gensim/models/fasttext_inner.pyx":68 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -1911,7 +1880,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":70 + /* "gensim/models/fasttext_inner.pyx":69 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -1920,7 +1889,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":68 + /* "gensim/models/fasttext_inner.pyx":67 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -1930,7 +1899,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L7; } - /* "gensim/models/fasttext_inner.pyx":72 + /* "gensim/models/fasttext_inner.pyx":71 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -1940,7 +1909,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":73 + /* "gensim/models/fasttext_inner.pyx":72 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -1949,7 +1918,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":74 + /* "gensim/models/fasttext_inner.pyx":73 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -1959,7 +1928,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":75 + /* "gensim/models/fasttext_inner.pyx":74 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -1968,7 +1937,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":74 + /* "gensim/models/fasttext_inner.pyx":73 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -1977,7 +1946,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":76 + /* "gensim/models/fasttext_inner.pyx":75 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -1988,7 +1957,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente } __pyx_L7:; - /* "gensim/models/fasttext_inner.pyx":78 + /* "gensim/models/fasttext_inner.pyx":77 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -1997,7 +1966,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":79 + /* "gensim/models/fasttext_inner.pyx":78 * * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2006,7 +1975,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":80 + /* "gensim/models/fasttext_inner.pyx":79 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2024,7 +1993,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_bool_binop_done:; if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":81 + /* "gensim/models/fasttext_inner.pyx":80 * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2033,7 +2002,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":80 + /* "gensim/models/fasttext_inner.pyx":79 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2042,7 +2011,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":82 + /* "gensim/models/fasttext_inner.pyx":81 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2051,7 +2020,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":83 + /* "gensim/models/fasttext_inner.pyx":82 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2060,7 +2029,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":84 + /* "gensim/models/fasttext_inner.pyx":83 * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2069,7 +2038,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":85 + /* "gensim/models/fasttext_inner.pyx":84 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2080,7 +2049,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":86 + /* "gensim/models/fasttext_inner.pyx":85 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2089,7 +2058,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":87 + /* "gensim/models/fasttext_inner.pyx":86 * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2100,7 +2069,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":88 + /* "gensim/models/fasttext_inner.pyx":87 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2110,7 +2079,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":89 + /* "gensim/models/fasttext_inner.pyx":88 * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) * return next_random # <<<<<<<<<<<<<< @@ -2120,7 +2089,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":43 + /* "gensim/models/fasttext_inner.pyx":42 * cdef REAL_t ONEF = 1.0 * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -2133,7 +2102,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":92 +/* "gensim/models/fasttext_inner.pyx":91 * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -2158,7 +2127,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t int __pyx_t_5; int __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":100 + /* "gensim/models/fasttext_inner.pyx":99 * * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -2167,7 +2136,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":101 + /* "gensim/models/fasttext_inner.pyx":100 * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< @@ -2176,7 +2145,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":104 + /* "gensim/models/fasttext_inner.pyx":103 * cdef REAL_t f, g, f_dot, lprob * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2185,7 +2154,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":105 + /* "gensim/models/fasttext_inner.pyx":104 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2194,7 +2163,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":107 + /* "gensim/models/fasttext_inner.pyx":106 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -2203,7 +2172,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":108 + /* "gensim/models/fasttext_inner.pyx":107 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2214,7 +2183,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":109 + /* "gensim/models/fasttext_inner.pyx":108 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -2224,7 +2193,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":110 + /* "gensim/models/fasttext_inner.pyx":109 * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< @@ -2233,7 +2202,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":111 + /* "gensim/models/fasttext_inner.pyx":110 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< @@ -2242,7 +2211,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":113 + /* "gensim/models/fasttext_inner.pyx":112 * sscal(&size, &norm_factor, l1 , &ONE) * * for b in range(codelen): # <<<<<<<<<<<<<< @@ -2253,7 +2222,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_b = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":114 + /* "gensim/models/fasttext_inner.pyx":113 * * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2262,7 +2231,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":115 + /* "gensim/models/fasttext_inner.pyx":114 * for b in range(codelen): * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2271,7 +2240,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":116 + /* "gensim/models/fasttext_inner.pyx":115 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2289,7 +2258,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L8_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":117 + /* "gensim/models/fasttext_inner.pyx":116 * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2298,7 +2267,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":116 + /* "gensim/models/fasttext_inner.pyx":115 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2307,7 +2276,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ } - /* "gensim/models/fasttext_inner.pyx":118 + /* "gensim/models/fasttext_inner.pyx":117 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2316,7 +2285,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":119 + /* "gensim/models/fasttext_inner.pyx":118 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2325,7 +2294,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":121 + /* "gensim/models/fasttext_inner.pyx":120 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2334,7 +2303,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":122 + /* "gensim/models/fasttext_inner.pyx":121 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2345,7 +2314,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":124 + /* "gensim/models/fasttext_inner.pyx":123 * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2354,7 +2323,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":125 + /* "gensim/models/fasttext_inner.pyx":124 * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2365,7 +2334,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":126 + /* "gensim/models/fasttext_inner.pyx":125 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2375,7 +2344,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":92 + /* "gensim/models/fasttext_inner.pyx":91 * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -2386,7 +2355,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t /* function exit code */ } -/* "gensim/models/fasttext_inner.pyx":129 +/* "gensim/models/fasttext_inner.pyx":128 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2416,7 +2385,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente long __pyx_t_6; int __pyx_t_7; - /* "gensim/models/fasttext_inner.pyx":138 + /* "gensim/models/fasttext_inner.pyx":137 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2425,7 +2394,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/fasttext_inner.pyx":139 + /* "gensim/models/fasttext_inner.pyx":138 * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL * cdef REAL_t f, g, count, inv_count = 1.0, label, log_e_f_dot, f_dot # <<<<<<<<<<<<<< @@ -2434,7 +2403,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_inv_count = 1.0; - /* "gensim/models/fasttext_inner.pyx":143 + /* "gensim/models/fasttext_inner.pyx":142 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< @@ -2443,7 +2412,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "gensim/models/fasttext_inner.pyx":145 + /* "gensim/models/fasttext_inner.pyx":144 * word_index = indexes[i] * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2452,7 +2421,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":146 + /* "gensim/models/fasttext_inner.pyx":145 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2461,7 +2430,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":147 + /* "gensim/models/fasttext_inner.pyx":146 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2472,7 +2441,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":148 + /* "gensim/models/fasttext_inner.pyx":147 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -2482,7 +2451,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":149 + /* "gensim/models/fasttext_inner.pyx":148 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -2491,7 +2460,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L3_continue; - /* "gensim/models/fasttext_inner.pyx":148 + /* "gensim/models/fasttext_inner.pyx":147 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -2500,7 +2469,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":150 + /* "gensim/models/fasttext_inner.pyx":149 * if m == i: * continue * count += ONEF # <<<<<<<<<<<<<< @@ -2509,7 +2478,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":151 + /* "gensim/models/fasttext_inner.pyx":150 * continue * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -2518,7 +2487,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":152 + /* "gensim/models/fasttext_inner.pyx":151 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< @@ -2529,7 +2498,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":153 + /* "gensim/models/fasttext_inner.pyx":152 * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) * for d in range(subwords_idx_len[m]): * count += ONEF # <<<<<<<<<<<<<< @@ -2538,7 +2507,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":154 + /* "gensim/models/fasttext_inner.pyx":153 * for d in range(subwords_idx_len[m]): * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -2550,7 +2519,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L3_continue:; } - /* "gensim/models/fasttext_inner.pyx":156 + /* "gensim/models/fasttext_inner.pyx":155 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * * if count > (0.5): # <<<<<<<<<<<<<< @@ -2560,7 +2529,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":157 + /* "gensim/models/fasttext_inner.pyx":156 * * if count > (0.5): * inv_count = ONEF / count # <<<<<<<<<<<<<< @@ -2569,7 +2538,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); - /* "gensim/models/fasttext_inner.pyx":156 + /* "gensim/models/fasttext_inner.pyx":155 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * * if count > (0.5): # <<<<<<<<<<<<<< @@ -2578,7 +2547,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":158 + /* "gensim/models/fasttext_inner.pyx":157 * if count > (0.5): * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< @@ -2588,7 +2557,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":159 + /* "gensim/models/fasttext_inner.pyx":158 * inv_count = ONEF / count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< @@ -2597,7 +2566,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":158 + /* "gensim/models/fasttext_inner.pyx":157 * if count > (0.5): * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< @@ -2606,7 +2575,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":161 + /* "gensim/models/fasttext_inner.pyx":160 * sscal(&size, &inv_count, neu1, &ONE) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2615,7 +2584,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":163 + /* "gensim/models/fasttext_inner.pyx":162 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2626,7 +2595,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_6; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":164 + /* "gensim/models/fasttext_inner.pyx":163 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2636,7 +2605,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":165 + /* "gensim/models/fasttext_inner.pyx":164 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2645,7 +2614,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":166 + /* "gensim/models/fasttext_inner.pyx":165 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2654,7 +2623,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":164 + /* "gensim/models/fasttext_inner.pyx":163 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2664,7 +2633,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L12; } - /* "gensim/models/fasttext_inner.pyx":168 + /* "gensim/models/fasttext_inner.pyx":167 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2674,7 +2643,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":169 + /* "gensim/models/fasttext_inner.pyx":168 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2683,7 +2652,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":170 + /* "gensim/models/fasttext_inner.pyx":169 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2693,7 +2662,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":171 + /* "gensim/models/fasttext_inner.pyx":170 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2702,7 +2671,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":170 + /* "gensim/models/fasttext_inner.pyx":169 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2711,7 +2680,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":172 + /* "gensim/models/fasttext_inner.pyx":171 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2722,7 +2691,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente } __pyx_L12:; - /* "gensim/models/fasttext_inner.pyx":174 + /* "gensim/models/fasttext_inner.pyx":173 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2731,7 +2700,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":175 + /* "gensim/models/fasttext_inner.pyx":174 * * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2740,7 +2709,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":175 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2758,7 +2727,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":177 + /* "gensim/models/fasttext_inner.pyx":176 * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2767,7 +2736,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":175 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2776,7 +2745,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":178 + /* "gensim/models/fasttext_inner.pyx":177 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2785,7 +2754,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":179 + /* "gensim/models/fasttext_inner.pyx":178 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2794,7 +2763,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":181 + /* "gensim/models/fasttext_inner.pyx":180 * g = (label - f) * alpha * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2803,7 +2772,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":182 + /* "gensim/models/fasttext_inner.pyx":181 * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2814,7 +2783,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":183 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2824,7 +2793,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":185 + /* "gensim/models/fasttext_inner.pyx":184 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # <<<<<<<<<<<<<< @@ -2833,7 +2802,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":183 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2842,7 +2811,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":187 + /* "gensim/models/fasttext_inner.pyx":186 * sscal(&size, &inv_count, work, &ONE) * * for m in range(j,k): # <<<<<<<<<<<<<< @@ -2853,7 +2822,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":188 + /* "gensim/models/fasttext_inner.pyx":187 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -2863,7 +2832,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":189 + /* "gensim/models/fasttext_inner.pyx":188 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -2872,7 +2841,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L18_continue; - /* "gensim/models/fasttext_inner.pyx":188 + /* "gensim/models/fasttext_inner.pyx":187 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -2881,7 +2850,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":189 * if m == i: * continue * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< @@ -2890,7 +2859,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":191 + /* "gensim/models/fasttext_inner.pyx":190 * continue * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< @@ -2901,7 +2870,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":192 + /* "gensim/models/fasttext_inner.pyx":191 * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2913,7 +2882,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L18_continue:; } - /* "gensim/models/fasttext_inner.pyx":194 + /* "gensim/models/fasttext_inner.pyx":193 * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -2923,7 +2892,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":129 + /* "gensim/models/fasttext_inner.pyx":128 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2936,7 +2905,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":197 +/* "gensim/models/fasttext_inner.pyx":196 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -2962,7 +2931,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx PY_LONG_LONG __pyx_t_6; int __pyx_t_7; - /* "gensim/models/fasttext_inner.pyx":206 + /* "gensim/models/fasttext_inner.pyx":205 * cdef long long a, b * cdef long long row2, sgn * cdef REAL_t f, g, count, inv_count = 1.0, f_dot, lprob # <<<<<<<<<<<<<< @@ -2971,7 +2940,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_inv_count = 1.0; - /* "gensim/models/fasttext_inner.pyx":209 + /* "gensim/models/fasttext_inner.pyx":208 * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2980,7 +2949,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":210 + /* "gensim/models/fasttext_inner.pyx":209 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2989,7 +2958,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":211 + /* "gensim/models/fasttext_inner.pyx":210 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -3000,7 +2969,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":212 + /* "gensim/models/fasttext_inner.pyx":211 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -3010,7 +2979,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":213 + /* "gensim/models/fasttext_inner.pyx":212 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -3019,7 +2988,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L3_continue; - /* "gensim/models/fasttext_inner.pyx":212 + /* "gensim/models/fasttext_inner.pyx":211 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -3028,7 +2997,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":214 + /* "gensim/models/fasttext_inner.pyx":213 * if m == i: * continue * count += ONEF # <<<<<<<<<<<<<< @@ -3037,7 +3006,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":215 + /* "gensim/models/fasttext_inner.pyx":214 * continue * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -3046,7 +3015,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":216 + /* "gensim/models/fasttext_inner.pyx":215 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< @@ -3057,7 +3026,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":217 + /* "gensim/models/fasttext_inner.pyx":216 * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) * for d in range(subwords_idx_len[m]): * count += ONEF # <<<<<<<<<<<<<< @@ -3066,7 +3035,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":218 + /* "gensim/models/fasttext_inner.pyx":217 * for d in range(subwords_idx_len[m]): * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -3078,7 +3047,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L3_continue:; } - /* "gensim/models/fasttext_inner.pyx":219 + /* "gensim/models/fasttext_inner.pyx":218 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -3088,7 +3057,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":220 + /* "gensim/models/fasttext_inner.pyx":219 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): * inv_count = ONEF / count # <<<<<<<<<<<<<< @@ -3097,7 +3066,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); - /* "gensim/models/fasttext_inner.pyx":219 + /* "gensim/models/fasttext_inner.pyx":218 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -3106,7 +3075,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":221 + /* "gensim/models/fasttext_inner.pyx":220 * if count > (0.5): * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< @@ -3116,7 +3085,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":222 + /* "gensim/models/fasttext_inner.pyx":221 * inv_count = ONEF / count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< @@ -3125,7 +3094,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":221 + /* "gensim/models/fasttext_inner.pyx":220 * if count > (0.5): * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< @@ -3134,7 +3103,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":224 + /* "gensim/models/fasttext_inner.pyx":223 * sscal(&size, &inv_count, neu1, &ONE) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -3143,7 +3112,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":225 + /* "gensim/models/fasttext_inner.pyx":224 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): # <<<<<<<<<<<<<< @@ -3154,7 +3123,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_1; __pyx_t_6+=1) { __pyx_v_b = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":226 + /* "gensim/models/fasttext_inner.pyx":225 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -3163,7 +3132,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":227 + /* "gensim/models/fasttext_inner.pyx":226 * for b in range(codelens[i]): * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -3172,7 +3141,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":228 + /* "gensim/models/fasttext_inner.pyx":227 * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3190,7 +3159,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L13_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":229 + /* "gensim/models/fasttext_inner.pyx":228 * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -3199,7 +3168,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":228 + /* "gensim/models/fasttext_inner.pyx":227 * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3208,7 +3177,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":230 + /* "gensim/models/fasttext_inner.pyx":229 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -3217,7 +3186,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":231 + /* "gensim/models/fasttext_inner.pyx":230 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -3226,7 +3195,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":233 + /* "gensim/models/fasttext_inner.pyx":232 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -3235,7 +3204,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":234 + /* "gensim/models/fasttext_inner.pyx":233 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -3246,7 +3215,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":235 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -3256,7 +3225,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":237 + /* "gensim/models/fasttext_inner.pyx":236 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # <<<<<<<<<<<<<< @@ -3265,7 +3234,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":235 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -3274,7 +3243,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":239 + /* "gensim/models/fasttext_inner.pyx":238 * sscal(&size, &inv_count, work, &ONE) * * for m in range(j,k): # <<<<<<<<<<<<<< @@ -3285,7 +3254,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":240 + /* "gensim/models/fasttext_inner.pyx":239 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3295,7 +3264,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":241 + /* "gensim/models/fasttext_inner.pyx":240 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -3304,7 +3273,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L16_continue; - /* "gensim/models/fasttext_inner.pyx":240 + /* "gensim/models/fasttext_inner.pyx":239 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3313,7 +3282,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":242 + /* "gensim/models/fasttext_inner.pyx":241 * if m == i: * continue * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< @@ -3322,7 +3291,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":243 + /* "gensim/models/fasttext_inner.pyx":242 * continue * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< @@ -3333,7 +3302,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":244 + /* "gensim/models/fasttext_inner.pyx":243 * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< @@ -3345,7 +3314,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L16_continue:; } - /* "gensim/models/fasttext_inner.pyx":197 + /* "gensim/models/fasttext_inner.pyx":196 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -3356,7 +3325,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx /* function exit code */ } -/* "gensim/models/fasttext_inner.pyx":247 +/* "gensim/models/fasttext_inner.pyx":246 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< @@ -3399,26 +3368,26 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObj case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 247, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 246, __pyx_L3_error) } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 247, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 246, __pyx_L3_error) } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 247, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 246, __pyx_L3_error) } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_l1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 247, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 246, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 247, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 246, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -3437,7 +3406,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObj } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 247, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 246, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3492,7 +3461,6 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject *__pyx_v_subwords = NULL; PyObject *__pyx_v_word_subwords = NULL; PyObject *__pyx_v_item = NULL; - PyObject *__pyx_v_subword = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -3512,165 +3480,162 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5numpy_uint32_t __pyx_t_15; PyObject *__pyx_t_16 = NULL; PyObject *__pyx_t_17 = NULL; - PyObject *__pyx_t_18 = NULL; - PyObject *__pyx_t_19 = NULL; - Py_ssize_t __pyx_t_20; - PyObject *(*__pyx_t_21)(PyObject *); + Py_ssize_t __pyx_t_18; + int __pyx_t_19; + int __pyx_t_20; + int __pyx_t_21; int __pyx_t_22; int __pyx_t_23; - int __pyx_t_24; - int __pyx_t_25; - int __pyx_t_26; __Pyx_RefNannySetupContext("train_batch_sg", 0); - /* "gensim/models/fasttext_inner.pyx":248 + /* "gensim/models/fasttext_inner.pyx":247 * * def train_batch_sg(model, sentences, alpha, _work, _l1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 248, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 247, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 248, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 247, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":249 + /* "gensim/models/fasttext_inner.pyx":248 * def train_batch_sg(model, sentences, alpha, _work, _l1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 249, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 248, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 249, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 248, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":250 + /* "gensim/models/fasttext_inner.pyx":249 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 250, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 249, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 250, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 249, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 250, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 249, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 250, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 249, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":252 + /* "gensim/models/fasttext_inner.pyx":251 * cdef int sample = (model.vocabulary.sample != 0) * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 252, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 251, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 252, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 251, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 252, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 251, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":253 + /* "gensim/models/fasttext_inner.pyx":252 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 253, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 253, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 253, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 252, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":254 + /* "gensim/models/fasttext_inner.pyx":253 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 254, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 253, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 254, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 253, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 254, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 253, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":255 + /* "gensim/models/fasttext_inner.pyx":254 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 255, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 254, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 255, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 254, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 255, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 254, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":260 + /* "gensim/models/fasttext_inner.pyx":259 * cdef REAL_t *l1 * * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.wv.vector_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 260, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 259, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":261 + /* "gensim/models/fasttext_inner.pyx":260 * * cdef REAL_t _alpha = alpha * cdef int size = model.wv.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 260, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":267 + /* "gensim/models/fasttext_inner.pyx":266 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 267, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 266, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 267, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 266, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":270 + /* "gensim/models/fasttext_inner.pyx":269 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -3680,19 +3645,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":290 + /* "gensim/models/fasttext_inner.pyx":289 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 290, __pyx_L1_error) + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 289, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":292 + /* "gensim/models/fasttext_inner.pyx":291 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -3702,23 +3667,23 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":293 + /* "gensim/models/fasttext_inner.pyx":292 * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 293, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 293, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 293, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 292, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":292 + /* "gensim/models/fasttext_inner.pyx":291 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -3727,7 +3692,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":295 + /* "gensim/models/fasttext_inner.pyx":294 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3737,55 +3702,55 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":296 + /* "gensim/models/fasttext_inner.pyx":295 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 296, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 295, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 296, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 296, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 295, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":297 + /* "gensim/models/fasttext_inner.pyx":296 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 296, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 297, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 296, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 297, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 296, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":298 + /* "gensim/models/fasttext_inner.pyx":297 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":295 + /* "gensim/models/fasttext_inner.pyx":294 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3794,7 +3759,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":299 + /* "gensim/models/fasttext_inner.pyx":298 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3812,41 +3777,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":300 + /* "gensim/models/fasttext_inner.pyx":299 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":299 + /* "gensim/models/fasttext_inner.pyx":298 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3855,42 +3820,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":303 + /* "gensim/models/fasttext_inner.pyx":302 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * l1 = np.PyArray_DATA(_l1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 303, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 302, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":304 + /* "gensim/models/fasttext_inner.pyx":303 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 304, __pyx_L1_error) + if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 303, __pyx_L1_error) __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); - /* "gensim/models/fasttext_inner.pyx":307 + /* "gensim/models/fasttext_inner.pyx":306 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 306, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 306, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":308 + /* "gensim/models/fasttext_inner.pyx":307 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -3899,7 +3864,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":309 + /* "gensim/models/fasttext_inner.pyx":308 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -3910,26 +3875,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 308, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 308, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 308, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 308, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 308, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 308, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -3939,7 +3904,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 309, __pyx_L1_error) + else __PYX_ERR(0, 308, __pyx_L1_error) } break; } @@ -3948,18 +3913,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":310 + /* "gensim/models/fasttext_inner.pyx":309 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 310, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":311 + /* "gensim/models/fasttext_inner.pyx":310 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -3968,7 +3933,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":310 + /* "gensim/models/fasttext_inner.pyx":309 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -3977,7 +3942,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":312 + /* "gensim/models/fasttext_inner.pyx":311 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -3988,26 +3953,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 311, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 311, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 311, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -4017,7 +3982,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 312, __pyx_L1_error) + else __PYX_ERR(0, 311, __pyx_L1_error) } break; } @@ -4026,16 +3991,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":313 + /* "gensim/models/fasttext_inner.pyx":312 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 313, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 313, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -4046,7 +4011,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":314 + /* "gensim/models/fasttext_inner.pyx":313 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -4057,7 +4022,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":315 + /* "gensim/models/fasttext_inner.pyx":314 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -4066,7 +4031,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":314 + /* "gensim/models/fasttext_inner.pyx":313 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -4075,7 +4040,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":316 + /* "gensim/models/fasttext_inner.pyx":315 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4088,20 +4053,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 315, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 315, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 315, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 315, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":317 + /* "gensim/models/fasttext_inner.pyx":316 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -4110,7 +4075,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":316 + /* "gensim/models/fasttext_inner.pyx":315 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4119,302 +4084,117 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":318 + /* "gensim/models/fasttext_inner.pyx":317 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * - * subwords = [ + * subwords = model.wv.buckets_word[word.index] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":320 + /* "gensim/models/fasttext_inner.pyx":319 * indexes[effective_words] = word.index * - * subwords = [ # <<<<<<<<<<<<<< - * model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) + * subwords = model.wv.buckets_word[word.index] # <<<<<<<<<<<<<< + * word_subwords = np.array((word.index,) + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 319, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - - /* "gensim/models/fasttext_inner.pyx":322 - * subwords = [ - * model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< - * ] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_buckets_word); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 319, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 319, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_1 = PyObject_GetItem(__pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = NULL; - __pyx_t_2 = 0; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_1))) { - __pyx_t_16 = PyMethod_GET_SELF(__pyx_t_1); - if (likely(__pyx_t_16)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); - __Pyx_INCREF(__pyx_t_16); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_1, function); - __pyx_t_2 = 1; - } - } - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_1)) { - PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - } else - #endif - #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { - PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - } else - #endif - { - __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (__pyx_t_16) { - __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; - } - __Pyx_INCREF(__pyx_v_token); - __Pyx_GIVEREF(__pyx_v_token); - PyTuple_SET_ITEM(__pyx_t_19, 0+__pyx_t_2, __pyx_v_token); - __Pyx_GIVEREF(__pyx_t_17); - PyTuple_SET_ITEM(__pyx_t_19, 1+__pyx_t_2, __pyx_t_17); - __Pyx_GIVEREF(__pyx_t_18); - PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); - __pyx_t_17 = 0; - __pyx_t_18 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { - __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; - __pyx_t_21 = NULL; - } else { - __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 322, __pyx_L1_error) - } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - for (;;) { - if (likely(!__pyx_t_21)) { - if (likely(PyList_CheckExact(__pyx_t_1))) { - if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 322, __pyx_L1_error) - #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - #endif - } else { - if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 322, __pyx_L1_error) - #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - #endif - } - } else { - __pyx_t_13 = __pyx_t_21(__pyx_t_1); - if (unlikely(!__pyx_t_13)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 322, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_13); - } - __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); - __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_XDECREF_SET(__pyx_v_subwords, __pyx_t_1); + __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":321 + /* "gensim/models/fasttext_inner.pyx":320 * - * subwords = [ - * model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - * ] - */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_17 = NULL; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { - __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_18); - if (likely(__pyx_t_17)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_18); - __Pyx_INCREF(__pyx_t_17); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_18, function); - } - } - if (!__pyx_t_17) { - __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - } else { - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_18)) { - PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_GOTREF(__pyx_t_13); - } else - #endif - #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { - PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_GOTREF(__pyx_t_13); - } else - #endif - { - __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; - __Pyx_INCREF(__pyx_v_subword); - __Pyx_GIVEREF(__pyx_v_subword); - PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - } - } - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - - /* "gensim/models/fasttext_inner.pyx":322 - * subwords = [ - * model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< - * ] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - */ - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); - __pyx_t_14 = 0; - - /* "gensim/models/fasttext_inner.pyx":324 - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - * ] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< + * subwords = model.wv.buckets_word[word.index] + * word_subwords = np.array((word.index,) + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 324, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_18 = PyList_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_GIVEREF(__pyx_t_14); - PyList_SET_ITEM(__pyx_t_18, 0, __pyx_t_14); - __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_18, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyTuple_New(1); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_GIVEREF(__pyx_t_14); - PyTuple_SET_ITEM(__pyx_t_18, 0, __pyx_t_14); - __pyx_t_14 = 0; - __pyx_t_14 = PyDict_New(); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 324, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_array); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 324, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1); + __pyx_t_1 = 0; + __pyx_t_1 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1); + __pyx_t_1 = 0; + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_18, __pyx_t_14); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 324, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_17) < 0) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = __Pyx_PyObject_Call(__pyx_t_14, __pyx_t_13, __pyx_t_1); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); - __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_17); + __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":325 - * ] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + /* "gensim/models/fasttext_inner.pyx":321 + * subwords = model.wv.buckets_word[word.index] + * word_subwords = np.array((word.index,) + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 325, __pyx_L1_error) - (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_20 + 1)); + __pyx_t_18 = PyObject_Length(__pyx_v_subwords); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 321, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); - /* "gensim/models/fasttext_inner.pyx":326 - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + /* "gensim/models/fasttext_inner.pyx":322 + * word_subwords = np.array((word.index,) + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 326, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 322, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":328 + /* "gensim/models/fasttext_inner.pyx":324 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 328, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 328, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 324, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_17, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 324, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":330 + /* "gensim/models/fasttext_inner.pyx":326 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4424,46 +4204,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":331 + /* "gensim/models/fasttext_inner.pyx":327 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 331, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 331, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_18 = PyObject_Length(__pyx_t_17); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":332 + /* "gensim/models/fasttext_inner.pyx":328 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 332, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 332, __pyx_L1_error) - (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 328, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (!(likely(((__pyx_t_17) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_17, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 328, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_17))); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":333 + /* "gensim/models/fasttext_inner.pyx":329 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * * effective_words += 1 */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 333, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 333, __pyx_L1_error) - (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 329, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (!(likely(((__pyx_t_17) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_17, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 329, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_17))); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":330 + /* "gensim/models/fasttext_inner.pyx":326 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4472,7 +4252,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":335 + /* "gensim/models/fasttext_inner.pyx":331 * points[effective_words] = np.PyArray_DATA(word.point) * * effective_words += 1 # <<<<<<<<<<<<<< @@ -4481,7 +4261,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":336 + /* "gensim/models/fasttext_inner.pyx":332 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4491,7 +4271,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":337 + /* "gensim/models/fasttext_inner.pyx":333 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -4500,7 +4280,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":336 + /* "gensim/models/fasttext_inner.pyx":332 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4509,7 +4289,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":312 + /* "gensim/models/fasttext_inner.pyx":311 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -4521,7 +4301,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":342 + /* "gensim/models/fasttext_inner.pyx":338 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 345, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_10 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 345, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { if (likely(!__pyx_t_10)) { - if (likely(PyList_CheckExact(__pyx_t_19))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; + if (likely(PyList_CheckExact(__pyx_t_17))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 345, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_17, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 345, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 345, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_17, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 345, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } } else { - __pyx_t_3 = __pyx_t_10(__pyx_t_19); + __pyx_t_3 = __pyx_t_10(__pyx_t_17); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 349, __pyx_L1_error) + else __PYX_ERR(0, 345, __pyx_L1_error) } break; } @@ -4693,17 +4473,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":350 + /* "gensim/models/fasttext_inner.pyx":346 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * with nogil: */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 350, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 346, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":349 + /* "gensim/models/fasttext_inner.pyx":345 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -4711,9 +4491,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ } - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":352 + /* "gensim/models/fasttext_inner.pyx":348 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4727,7 +4507,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":353 + /* "gensim/models/fasttext_inner.pyx":349 * * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -4735,10 +4515,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * idx_end = sentence_idx[sent_idx + 1] */ __pyx_t_2 = __pyx_v_effective_sentences; - for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { - __pyx_v_sent_idx = __pyx_t_22; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_2; __pyx_t_19+=1) { + __pyx_v_sent_idx = __pyx_t_19; - /* "gensim/models/fasttext_inner.pyx":354 + /* "gensim/models/fasttext_inner.pyx":350 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -4747,7 +4527,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":355 + /* "gensim/models/fasttext_inner.pyx":351 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -4756,18 +4536,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":356 + /* "gensim/models/fasttext_inner.pyx":352 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < idx_start: */ - __pyx_t_23 = __pyx_v_idx_end; - for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { - __pyx_v_i = __pyx_t_24; + __pyx_t_20 = __pyx_v_idx_end; + for (__pyx_t_21 = __pyx_v_idx_start; __pyx_t_21 < __pyx_t_20; __pyx_t_21+=1) { + __pyx_v_i = __pyx_t_21; - /* "gensim/models/fasttext_inner.pyx":357 + /* "gensim/models/fasttext_inner.pyx":353 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4776,7 +4556,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":354 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4786,7 +4566,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":359 + /* "gensim/models/fasttext_inner.pyx":355 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -4795,7 +4575,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":354 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4804,7 +4584,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":360 + /* "gensim/models/fasttext_inner.pyx":356 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4813,7 +4593,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":357 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4823,7 +4603,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":362 + /* "gensim/models/fasttext_inner.pyx":358 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -4832,7 +4612,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":357 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4841,18 +4621,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":363 + /* "gensim/models/fasttext_inner.pyx":359 * if k > idx_end: * k = idx_end * for j in range(j, k): # <<<<<<<<<<<<<< * if j == i: * continue */ - __pyx_t_25 = __pyx_v_k; - for (__pyx_t_26 = __pyx_v_j; __pyx_t_26 < __pyx_t_25; __pyx_t_26+=1) { - __pyx_v_j = __pyx_t_26; + __pyx_t_22 = __pyx_v_k; + for (__pyx_t_23 = __pyx_v_j; __pyx_t_23 < __pyx_t_22; __pyx_t_23+=1) { + __pyx_v_j = __pyx_t_23; - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":360 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4862,16 +4642,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":365 + /* "gensim/models/fasttext_inner.pyx":361 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< * if hs: * fast_sentence_sg_hs( */ - goto __pyx_L33_continue; + goto __pyx_L31_continue; - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":360 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4880,7 +4660,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":366 + /* "gensim/models/fasttext_inner.pyx":362 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4890,7 +4670,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":367 + /* "gensim/models/fasttext_inner.pyx":363 * continue * if hs: * fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -4899,7 +4679,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":366 + /* "gensim/models/fasttext_inner.pyx":362 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4908,7 +4688,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":371 + /* "gensim/models/fasttext_inner.pyx":367 * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4918,7 +4698,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":372 + /* "gensim/models/fasttext_inner.pyx":368 * word_locks_ngrams) * if negative: * next_random = fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -4927,7 +4707,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":371 + /* "gensim/models/fasttext_inner.pyx":367 * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4935,13 +4715,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, */ } - __pyx_L33_continue:; + __pyx_L31_continue:; } } } } - /* "gensim/models/fasttext_inner.pyx":352 + /* "gensim/models/fasttext_inner.pyx":348 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4953,13 +4733,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L26; + goto __pyx_L24; } - __pyx_L26:; + __pyx_L24:; } } - /* "gensim/models/fasttext_inner.pyx":377 + /* "gensim/models/fasttext_inner.pyx":373 * next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -4967,13 +4747,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 377, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __pyx_r = __pyx_t_19; - __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 373, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_r = __pyx_t_17; + __pyx_t_17 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":247 + /* "gensim/models/fasttext_inner.pyx":246 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< @@ -4990,8 +4770,6 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_16); __Pyx_XDECREF(__pyx_t_17); - __Pyx_XDECREF(__pyx_t_18); - __Pyx_XDECREF(__pyx_t_19); __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -5003,13 +4781,12 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_v_subwords); __Pyx_XDECREF(__pyx_v_word_subwords); __Pyx_XDECREF(__pyx_v_item); - __Pyx_XDECREF(__pyx_v_subword); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":380 +/* "gensim/models/fasttext_inner.pyx":376 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -5052,26 +4829,26 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 376, __pyx_L3_error) } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 376, __pyx_L3_error) } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 376, __pyx_L3_error) } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 376, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 380, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 376, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -5090,7 +4867,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 376, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5146,7 +4923,6 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject *__pyx_v_subwords = NULL; PyObject *__pyx_v_word_subwords = NULL; PyObject *__pyx_v_item = NULL; - PyObject *__pyx_v_subword = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -5166,176 +4942,173 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5numpy_uint32_t __pyx_t_15; PyObject *__pyx_t_16 = NULL; PyObject *__pyx_t_17 = NULL; - PyObject *__pyx_t_18 = NULL; - PyObject *__pyx_t_19 = NULL; - Py_ssize_t __pyx_t_20; - PyObject *(*__pyx_t_21)(PyObject *); - int __pyx_t_22; - int __pyx_t_23; - int __pyx_t_24; + Py_ssize_t __pyx_t_18; + int __pyx_t_19; + int __pyx_t_20; + int __pyx_t_21; __Pyx_RefNannySetupContext("train_batch_cbow", 0); - /* "gensim/models/fasttext_inner.pyx":381 + /* "gensim/models/fasttext_inner.pyx":377 * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":382 + /* "gensim/models/fasttext_inner.pyx":378 * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 378, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 378, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":383 + /* "gensim/models/fasttext_inner.pyx":379 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":384 + /* "gensim/models/fasttext_inner.pyx":380 * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":386 + /* "gensim/models/fasttext_inner.pyx":382 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 382, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":387 + /* "gensim/models/fasttext_inner.pyx":383 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 387, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 383, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":388 + /* "gensim/models/fasttext_inner.pyx":384 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.vectors_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 388, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 388, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 388, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 384, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":389 + /* "gensim/models/fasttext_inner.pyx":385 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.trainables.vectors_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.vectors_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.trainables.vectors_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 389, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 389, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_ngrams_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 389, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 385, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":392 + /* "gensim/models/fasttext_inner.pyx":388 * * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.wv.vector_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 392, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 388, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":393 + /* "gensim/models/fasttext_inner.pyx":389 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.wv.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 393, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 393, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 393, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":399 + /* "gensim/models/fasttext_inner.pyx":395 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 399, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 395, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 399, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 395, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":402 + /* "gensim/models/fasttext_inner.pyx":398 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -5345,19 +5118,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":422 + /* "gensim/models/fasttext_inner.pyx":418 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 418, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":420 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5367,23 +5140,23 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":425 + /* "gensim/models/fasttext_inner.pyx":421 * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 421, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 421, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 421, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":420 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5392,7 +5165,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":423 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5402,55 +5175,55 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":428 + /* "gensim/models/fasttext_inner.pyx":424 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 424, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 424, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 428, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 424, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":425 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 429, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":426 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 426, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 426, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 426, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":423 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5459,7 +5232,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":427 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5477,41 +5250,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":432 + /* "gensim/models/fasttext_inner.pyx":428 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":427 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5520,42 +5293,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":435 + /* "gensim/models/fasttext_inner.pyx":431 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 435, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 431, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":436 + /* "gensim/models/fasttext_inner.pyx":432 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 436, __pyx_L1_error) + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 432, __pyx_L1_error) __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":439 + /* "gensim/models/fasttext_inner.pyx":435 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 435, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 435, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":440 + /* "gensim/models/fasttext_inner.pyx":436 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -5564,7 +5337,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":437 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -5575,26 +5348,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 437, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 437, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 437, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5604,7 +5377,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 441, __pyx_L1_error) + else __PYX_ERR(0, 437, __pyx_L1_error) } break; } @@ -5613,18 +5386,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":438 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 438, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":443 + /* "gensim/models/fasttext_inner.pyx":439 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -5633,7 +5406,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":438 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -5642,7 +5415,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":440 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -5653,26 +5426,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 440, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -5682,7 +5455,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 444, __pyx_L1_error) + else __PYX_ERR(0, 440, __pyx_L1_error) } break; } @@ -5691,16 +5464,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":445 + /* "gensim/models/fasttext_inner.pyx":441 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -5711,7 +5484,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":442 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5722,7 +5495,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":447 + /* "gensim/models/fasttext_inner.pyx":443 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -5731,7 +5504,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":442 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5740,7 +5513,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":444 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5753,20 +5526,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":449 + /* "gensim/models/fasttext_inner.pyx":445 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -5775,7 +5548,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":444 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5784,292 +5557,107 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":450 + /* "gensim/models/fasttext_inner.pyx":446 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * - * subwords = [ + * subwords = model.wv.buckets_word[word.index] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":452 + /* "gensim/models/fasttext_inner.pyx":448 * indexes[effective_words] = word.index * - * subwords = [ # <<<<<<<<<<<<<< - * model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - - /* "gensim/models/fasttext_inner.pyx":454 - * subwords = [ - * model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< - * ] - * word_subwords = np.array(subwords, dtype=np.uint32) - */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_min_n); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_max_n); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = NULL; - __pyx_t_2 = 0; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_1))) { - __pyx_t_16 = PyMethod_GET_SELF(__pyx_t_1); - if (likely(__pyx_t_16)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); - __Pyx_INCREF(__pyx_t_16); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_1, function); - __pyx_t_2 = 1; - } - } - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_1)) { - PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - } else - #endif - #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { - PyObject *__pyx_temp[4] = {__pyx_t_16, __pyx_v_token, __pyx_t_17, __pyx_t_18}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_2, 3+__pyx_t_2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - } else - #endif - { - __pyx_t_19 = PyTuple_New(3+__pyx_t_2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (__pyx_t_16) { - __Pyx_GIVEREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_16); __pyx_t_16 = NULL; - } - __Pyx_INCREF(__pyx_v_token); - __Pyx_GIVEREF(__pyx_v_token); - PyTuple_SET_ITEM(__pyx_t_19, 0+__pyx_t_2, __pyx_v_token); - __Pyx_GIVEREF(__pyx_t_17); - PyTuple_SET_ITEM(__pyx_t_19, 1+__pyx_t_2, __pyx_t_17); - __Pyx_GIVEREF(__pyx_t_18); - PyTuple_SET_ITEM(__pyx_t_19, 2+__pyx_t_2, __pyx_t_18); - __pyx_t_17 = 0; - __pyx_t_18 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_19, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { - __pyx_t_1 = __pyx_t_13; __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0; - __pyx_t_21 = NULL; - } else { - __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 454, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - for (;;) { - if (likely(!__pyx_t_21)) { - if (likely(PyList_CheckExact(__pyx_t_1))) { - if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) - #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - #endif - } else { - if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) - #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - #endif - } - } else { - __pyx_t_13 = __pyx_t_21(__pyx_t_1); - if (unlikely(!__pyx_t_13)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 454, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_13); - } - __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_13); - __pyx_t_13 = 0; - - /* "gensim/models/fasttext_inner.pyx":453 - * - * subwords = [ - * model.wv.hash2index[ft_hash(subword) % model.bucket] # <<<<<<<<<<<<<< - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - * ] - */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_hash2index); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_18 = __Pyx_GetModuleGlobalName(__pyx_n_s_ft_hash); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_17 = NULL; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_18))) { - __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_18); - if (likely(__pyx_t_17)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_18); - __Pyx_INCREF(__pyx_t_17); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_18, function); - } - } - if (!__pyx_t_17) { - __pyx_t_13 = __Pyx_PyObject_CallOneArg(__pyx_t_18, __pyx_v_subword); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - } else { - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_18)) { - PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_GOTREF(__pyx_t_13); - } else - #endif - #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_18)) { - PyObject *__pyx_temp[2] = {__pyx_t_17, __pyx_v_subword}; - __pyx_t_13 = __Pyx_PyCFunction_FastCall(__pyx_t_18, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_GOTREF(__pyx_t_13); - } else - #endif - { - __pyx_t_16 = PyTuple_New(1+1); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_17); __pyx_t_17 = NULL; - __Pyx_INCREF(__pyx_v_subword); - __Pyx_GIVEREF(__pyx_v_subword); - PyTuple_SET_ITEM(__pyx_t_16, 0+1, __pyx_v_subword); - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_t_16, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - } - } - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_bucket); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = PyNumber_Remainder(__pyx_t_13, __pyx_t_18); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_16); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyObject_GetItem(__pyx_t_19, __pyx_t_16); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 453, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_18))) __PYX_ERR(0, 452, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - - /* "gensim/models/fasttext_inner.pyx":454 - * subwords = [ - * model.wv.hash2index[ft_hash(subword) % model.bucket] - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) # <<<<<<<<<<<<<< - * ] + * subwords = model.wv.buckets_word[word.index] # <<<<<<<<<<<<<< * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = len(subwords) */ - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); - __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_buckets_word); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_1 = PyObject_GetItem(__pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_XDECREF_SET(__pyx_v_subwords, __pyx_t_1); + __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":456 - * for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - * ] + /* "gensim/models/fasttext_inner.pyx":449 + * + * subwords = model.wv.buckets_word[word.index] * word_subwords = np.array(subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 456, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 456, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 456, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_array); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_subwords); __Pyx_GIVEREF(__pyx_v_subwords); - PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_v_subwords); - __pyx_t_18 = PyDict_New(); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 456, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 456, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_subwords); + __pyx_t_13 = PyDict_New(); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_16 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 456, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_16, __pyx_n_s_uint32); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_18, __pyx_n_s_dtype, __pyx_t_19) < 0) __PYX_ERR(0, 456, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - __pyx_t_19 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, __pyx_t_18); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 456, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (PyDict_SetItem(__pyx_t_13, __pyx_n_s_dtype, __pyx_t_17) < 0) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = __Pyx_PyObject_Call(__pyx_t_14, __pyx_t_1, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_19); - __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_17); + __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":457 - * ] + /* "gensim/models/fasttext_inner.pyx":450 + * subwords = model.wv.buckets_word[word.index] * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_20 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 457, __pyx_L1_error) - (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_20); + __pyx_t_18 = PyObject_Length(__pyx_v_subwords); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 450, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":458 + /* "gensim/models/fasttext_inner.pyx":451 * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 458, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 451, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":460 + /* "gensim/models/fasttext_inner.pyx":453 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 460, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_19, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 460, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_17, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 453, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":455 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6079,46 +5667,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":463 + /* "gensim/models/fasttext_inner.pyx":456 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 463, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __pyx_t_20 = PyObject_Length(__pyx_t_19); if (unlikely(__pyx_t_20 == -1)) __PYX_ERR(0, 463, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_20); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 456, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_18 = PyObject_Length(__pyx_t_17); if (unlikely(__pyx_t_18 == -1)) __PYX_ERR(0, 456, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":464 + /* "gensim/models/fasttext_inner.pyx":457 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 464, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 464, __pyx_L1_error) - (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 457, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (!(likely(((__pyx_t_17) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_17, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 457, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_17))); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":465 + /* "gensim/models/fasttext_inner.pyx":458 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 465, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - if (!(likely(((__pyx_t_19) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_19, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 465, __pyx_L1_error) - (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_19))); - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 458, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (!(likely(((__pyx_t_17) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_17, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 458, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_17))); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":455 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6127,7 +5715,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":466 + /* "gensim/models/fasttext_inner.pyx":459 * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 # <<<<<<<<<<<<<< @@ -6136,7 +5724,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":467 + /* "gensim/models/fasttext_inner.pyx":460 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6146,7 +5734,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":468 + /* "gensim/models/fasttext_inner.pyx":461 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # <<<<<<<<<<<<<< @@ -6155,7 +5743,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":467 + /* "gensim/models/fasttext_inner.pyx":460 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6164,7 +5752,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":440 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -6176,7 +5764,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":473 + /* "gensim/models/fasttext_inner.pyx":466 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 480, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 473, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_10 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 473, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { if (likely(!__pyx_t_10)) { - if (likely(PyList_CheckExact(__pyx_t_19))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_19)) break; + if (likely(PyList_CheckExact(__pyx_t_17))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 480, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 473, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 480, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_17, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 473, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_19)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 480, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 473, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_19, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 480, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_17, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 473, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } } else { - __pyx_t_3 = __pyx_t_10(__pyx_t_19); + __pyx_t_3 = __pyx_t_10(__pyx_t_17); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 480, __pyx_L1_error) + else __PYX_ERR(0, 473, __pyx_L1_error) } break; } @@ -6348,17 +5936,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":481 + /* "gensim/models/fasttext_inner.pyx":474 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 481, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 474, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":480 + /* "gensim/models/fasttext_inner.pyx":473 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -6366,9 +5954,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ } - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":477 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6382,7 +5970,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":485 + /* "gensim/models/fasttext_inner.pyx":478 * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -6390,10 +5978,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * idx_end = sentence_idx[sent_idx + 1] */ __pyx_t_2 = __pyx_v_effective_sentences; - for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_2; __pyx_t_22+=1) { - __pyx_v_sent_idx = __pyx_t_22; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_2; __pyx_t_19+=1) { + __pyx_v_sent_idx = __pyx_t_19; - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":479 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -6402,7 +5990,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":480 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -6411,18 +5999,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":481 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < idx_start: */ - __pyx_t_23 = __pyx_v_idx_end; - for (__pyx_t_24 = __pyx_v_idx_start; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { - __pyx_v_i = __pyx_t_24; + __pyx_t_20 = __pyx_v_idx_end; + for (__pyx_t_21 = __pyx_v_idx_start; __pyx_t_21 < __pyx_t_20; __pyx_t_21+=1) { + __pyx_v_i = __pyx_t_21; - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":482 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6431,7 +6019,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":483 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6441,7 +6029,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":491 + /* "gensim/models/fasttext_inner.pyx":484 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -6450,7 +6038,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":483 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6459,7 +6047,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":492 + /* "gensim/models/fasttext_inner.pyx":485 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6468,7 +6056,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":493 + /* "gensim/models/fasttext_inner.pyx":486 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6478,7 +6066,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":494 + /* "gensim/models/fasttext_inner.pyx":487 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -6487,7 +6075,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":493 + /* "gensim/models/fasttext_inner.pyx":486 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6496,7 +6084,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":496 + /* "gensim/models/fasttext_inner.pyx":489 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6506,7 +6094,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":497 + /* "gensim/models/fasttext_inner.pyx":490 * * if hs: * fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -6515,7 +6103,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":496 + /* "gensim/models/fasttext_inner.pyx":489 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6524,7 +6112,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":501 + /* "gensim/models/fasttext_inner.pyx":494 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6534,7 +6122,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":502 + /* "gensim/models/fasttext_inner.pyx":495 * word_locks_ngrams) * if negative: * next_random = fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -6543,7 +6131,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":501 + /* "gensim/models/fasttext_inner.pyx":494 * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6555,7 +6143,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":477 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6567,13 +6155,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L26; + goto __pyx_L24; } - __pyx_L26:; + __pyx_L24:; } } - /* "gensim/models/fasttext_inner.pyx":507 + /* "gensim/models/fasttext_inner.pyx":500 * cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -6581,13 +6169,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_19 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 507, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_19); - __pyx_r = __pyx_t_19; - __pyx_t_19 = 0; + __pyx_t_17 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 500, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_r = __pyx_t_17; + __pyx_t_17 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":376 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6604,8 +6192,6 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_16); __Pyx_XDECREF(__pyx_t_17); - __Pyx_XDECREF(__pyx_t_18); - __Pyx_XDECREF(__pyx_t_19); __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -6617,13 +6203,12 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF(__pyx_v_subwords); __Pyx_XDECREF(__pyx_v_word_subwords); __Pyx_XDECREF(__pyx_v_item); - __Pyx_XDECREF(__pyx_v_subword); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":510 +/* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< @@ -6662,7 +6247,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":519 + /* "gensim/models/fasttext_inner.pyx":512 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6672,7 +6257,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":520 + /* "gensim/models/fasttext_inner.pyx":513 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6682,7 +6267,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":521 + /* "gensim/models/fasttext_inner.pyx":514 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6691,7 +6276,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":522 + /* "gensim/models/fasttext_inner.pyx":515 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6700,7 +6285,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":527 + /* "gensim/models/fasttext_inner.pyx":520 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6710,7 +6295,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":528 + /* "gensim/models/fasttext_inner.pyx":521 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6719,7 +6304,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":529 + /* "gensim/models/fasttext_inner.pyx":522 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6728,7 +6313,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":530 + /* "gensim/models/fasttext_inner.pyx":523 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6738,7 +6323,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":526 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6747,7 +6332,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":527 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6756,7 +6341,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":528 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6766,7 +6351,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":536 + /* "gensim/models/fasttext_inner.pyx":529 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6775,7 +6360,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":537 + /* "gensim/models/fasttext_inner.pyx":530 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6784,7 +6369,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":538 + /* "gensim/models/fasttext_inner.pyx":531 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6796,7 +6381,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":528 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6805,7 +6390,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":539 + /* "gensim/models/fasttext_inner.pyx":532 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6815,7 +6400,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":540 + /* "gensim/models/fasttext_inner.pyx":533 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6824,7 +6409,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":541 + /* "gensim/models/fasttext_inner.pyx":534 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6833,7 +6418,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":542 + /* "gensim/models/fasttext_inner.pyx":535 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6845,7 +6430,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":539 + /* "gensim/models/fasttext_inner.pyx":532 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6854,7 +6439,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":546 + /* "gensim/models/fasttext_inner.pyx":539 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6864,7 +6449,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":547 + /* "gensim/models/fasttext_inner.pyx":540 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6873,7 +6458,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":548 + /* "gensim/models/fasttext_inner.pyx":541 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6886,7 +6471,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":510 + /* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< @@ -9451,12 +9036,11 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, {&__pyx_n_s_array, __pyx_k_array, sizeof(__pyx_k_array), 0, 0, 1, 1}, - {&__pyx_n_s_bucket, __pyx_k_bucket, sizeof(__pyx_k_bucket), 0, 0, 1, 1}, + {&__pyx_n_s_buckets_word, __pyx_k_buckets_word, sizeof(__pyx_k_buckets_word), 0, 0, 1, 1}, {&__pyx_n_s_cbow_mean, __pyx_k_cbow_mean, sizeof(__pyx_k_cbow_mean), 0, 0, 1, 1}, {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, {&__pyx_n_s_codes, __pyx_k_codes, sizeof(__pyx_k_codes), 0, 0, 1, 1}, - {&__pyx_n_s_compute_ngrams, __pyx_k_compute_ngrams, sizeof(__pyx_k_compute_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_cum_table, __pyx_k_cum_table, sizeof(__pyx_k_cum_table), 0, 0, 1, 1}, {&__pyx_n_s_cum_table_len, __pyx_k_cum_table_len, sizeof(__pyx_k_cum_table_len), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, @@ -9467,9 +9051,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, - {&__pyx_n_s_ft_hash, __pyx_k_ft_hash, sizeof(__pyx_k_ft_hash), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_fasttext_inner, __pyx_k_gensim_models_fasttext_inner, sizeof(__pyx_k_gensim_models_fasttext_inner), 0, 0, 1, 1}, - {&__pyx_n_s_hash2index, __pyx_k_hash2index, sizeof(__pyx_k_hash2index), 0, 0, 1, 1}, {&__pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_k_home_jbaiter_projekte_gensim_ge, sizeof(__pyx_k_home_jbaiter_projekte_gensim_ge), 0, 0, 1, 0}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, @@ -9485,8 +9067,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_l1, __pyx_k_l1, sizeof(__pyx_k_l1), 0, 0, 1, 1}, {&__pyx_n_s_l1_2, __pyx_k_l1_2, sizeof(__pyx_k_l1_2), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, - {&__pyx_n_s_max_n, __pyx_k_max_n, sizeof(__pyx_k_max_n), 0, 0, 1, 1}, - {&__pyx_n_s_min_n, __pyx_k_min_n, sizeof(__pyx_k_min_n), 0, 0, 1, 1}, {&__pyx_n_s_model, __pyx_k_model, sizeof(__pyx_k_model), 0, 0, 1, 1}, {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0}, {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0}, @@ -9513,7 +9093,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_sentence_idx, __pyx_k_sentence_idx, sizeof(__pyx_k_sentence_idx), 0, 0, 1, 1}, {&__pyx_n_s_sentences, __pyx_k_sentences, sizeof(__pyx_k_sentences), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, - {&__pyx_n_s_subword, __pyx_k_subword, sizeof(__pyx_k_subword), 0, 0, 1, 1}, {&__pyx_n_s_subword_arrays, __pyx_k_subword_arrays, sizeof(__pyx_k_subword_arrays), 0, 0, 1, 1}, {&__pyx_n_s_subwords, __pyx_k_subwords, sizeof(__pyx_k_subwords), 0, 0, 1, 1}, {&__pyx_n_s_subwords_idx, __pyx_k_subwords_idx, sizeof(__pyx_k_subwords_idx), 0, 0, 1, 1}, @@ -9529,7 +9108,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_trainables, __pyx_k_trainables, sizeof(__pyx_k_trainables), 0, 0, 1, 1}, {&__pyx_n_s_uint32, __pyx_k_uint32, sizeof(__pyx_k_uint32), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, - {&__pyx_n_s_utils_any2vec, __pyx_k_utils_any2vec, sizeof(__pyx_k_utils_any2vec), 0, 0, 1, 1}, {&__pyx_n_s_vector_size, __pyx_k_vector_size, sizeof(__pyx_k_vector_size), 0, 0, 1, 1}, {&__pyx_n_s_vectors_ngrams, __pyx_k_vectors_ngrams, sizeof(__pyx_k_vectors_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_vectors_ngrams_lockf, __pyx_k_vectors_ngrams_lockf, sizeof(__pyx_k_vectors_ngrams_lockf), 0, 0, 1, 1}, @@ -9552,8 +9130,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 18, __pyx_L1_error) - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 62, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 61, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 345, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 218, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 799, __pyx_L1_error) return 0; @@ -9565,31 +9143,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/fasttext_inner.pyx":300 + /* "gensim/models/fasttext_inner.pyx":299 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/fasttext_inner.pyx":432 + /* "gensim/models/fasttext_inner.pyx":428 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -9690,41 +9268,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - /* "gensim/models/fasttext_inner.pyx":247 + /* "gensim/models/fasttext_inner.pyx":246 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 247, __pyx_L1_error) + __pyx_tuple__15 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 246, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_train_batch_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 247, __pyx_L1_error) + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_train_batch_sg, 246, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 246, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":376 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_tuple__17 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_train_batch_cbow, 380, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_train_batch_cbow, 376, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 376, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":510 + /* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 510, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_init, 510, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 510, __pyx_L1_error) + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jbaiter_projekte_gensim_ge, __pyx_n_s_init, 503, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -9963,7 +9541,7 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< * - * from _utils_any2vec import compute_ngrams, ft_hash + * from word2vec_inner cimport bisect_left, random_int32, \ */ __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 20, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_10); @@ -10005,50 +9583,22 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) __pyx_L9_try_end:; } - /* "gensim/models/fasttext_inner.pyx":22 - * import scipy.linalg.blas as fblas - * - * from _utils_any2vec import compute_ngrams, ft_hash # <<<<<<<<<<<<<< - * from word2vec_inner cimport bisect_left, random_int32, \ - * scopy, saxpy, sdot, dsdot, snrm2, sscal, \ - */ - __pyx_t_9 = PyList_New(2); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - __Pyx_INCREF(__pyx_n_s_compute_ngrams); - __Pyx_GIVEREF(__pyx_n_s_compute_ngrams); - PyList_SET_ITEM(__pyx_t_9, 0, __pyx_n_s_compute_ngrams); - __Pyx_INCREF(__pyx_n_s_ft_hash); - __Pyx_GIVEREF(__pyx_n_s_ft_hash); - PyList_SET_ITEM(__pyx_t_9, 1, __pyx_n_s_ft_hash); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_utils_any2vec, __pyx_t_9, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_compute_ngrams); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_compute_ngrams, __pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_ft_hash); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_ft_hash, __pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "gensim/models/fasttext_inner.pyx":29 + /* "gensim/models/fasttext_inner.pyx":28 * our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas * * REAL = np.float32 # <<<<<<<<<<<<<< * * DEF MAX_SENTENCE_LEN = 10000 */ - __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_float32); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 29, __pyx_L1_error) + __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 28, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_9) < 0) __PYX_ERR(0, 29, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 28, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_3) < 0) __PYX_ERR(0, 28, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":40 + /* "gensim/models/fasttext_inner.pyx":39 * cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -10057,7 +9607,7 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) */ __pyx_v_6gensim_6models_14fasttext_inner_ONE = 1; - /* "gensim/models/fasttext_inner.pyx":41 + /* "gensim/models/fasttext_inner.pyx":40 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< @@ -10066,89 +9616,89 @@ PyMODINIT_FUNC PyInit_fasttext_inner(void) */ __pyx_v_6gensim_6models_14fasttext_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "gensim/models/fasttext_inner.pyx":247 + /* "gensim/models/fasttext_inner.pyx":246 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 247, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 247, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 246, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_3) < 0) __PYX_ERR(0, 246, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":376 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 380, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 380, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 376, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_3) < 0) __PYX_ERR(0, 376, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":510 + /* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 510, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 510, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 503, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_3) < 0) __PYX_ERR(0, 503, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":550 + /* "gensim/models/fasttext_inner.pyx":543 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN * */ - __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 550, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); + __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 543, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); __pyx_t_7 = NULL; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) { - __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_3); + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_9))) { + __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_9); if (likely(__pyx_t_7)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_9); __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_3, function); + __Pyx_DECREF_SET(__pyx_t_9, function); } } if (__pyx_t_7) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 550, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_9, __pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 543, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 550, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 543, __pyx_L1_error) } - __Pyx_GOTREF(__pyx_t_9); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 550, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_3) < 0) __PYX_ERR(0, 543, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":551 + /* "gensim/models/fasttext_inner.pyx":544 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< * */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 551, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 544, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False * # cython: wraparound=False */ - __pyx_t_9 = PyDict_New(); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_9) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "../../.envs/gensim/lib/python3.6/site-packages/Cython/Includes/numpy/__init__.pxd":997 * raise ImportError("numpy.core.umath failed to import") @@ -10549,72 +10099,8 @@ static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, P } #endif // CYTHON_FAST_PYCCALL -/* PyObjectCallMethO */ - #if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = PyCFunction_GET_FUNCTION(func); - self = PyCFunction_GET_SELF(func); - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectCallOneArg */ - #if CYTHON_COMPILING_IN_CPYTHON -static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *result; - PyObject *args = PyTuple_New(1); - if (unlikely(!args)) return NULL; - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 0, arg); - result = __Pyx_PyObject_Call(func, args, NULL); - Py_DECREF(args); - return result; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { -#if CYTHON_FAST_PYCALL - if (PyFunction_Check(func)) { - return __Pyx_PyFunction_FastCall(func, &arg, 1); - } -#endif -#ifdef __Pyx_CyFunction_USED - if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { -#else - if (likely(PyCFunction_Check(func))) { -#endif - if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { - return __Pyx_PyObject_CallMethO(func, arg); -#if CYTHON_FAST_PYCCALL - } else if (PyCFunction_GET_FLAGS(func) & METH_FASTCALL) { - return __Pyx_PyCFunction_FastCall(func, &arg, 1); -#endif - } - } - return __Pyx__PyObject_CallOneArg(func, arg); -} -#else -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *result; - PyObject *args = PyTuple_Pack(1, arg); - if (unlikely(!args)) return NULL; - result = __Pyx_PyObject_Call(func, args, NULL); - Py_DECREF(args); - return result; -} -#endif - /* PyErrFetchRestore */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { PyObject *tmp_type, *tmp_value, *tmp_tb; tmp_type = tstate->curexc_type; @@ -10638,7 +10124,7 @@ static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject #endif /* RaiseException */ - #if PY_MAJOR_VERSION < 3 + #if PY_MAJOR_VERSION < 3 static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, CYTHON_UNUSED PyObject *cause) { __Pyx_PyThreadState_declare @@ -10801,25 +10287,25 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject #endif /* RaiseTooManyValuesToUnpack */ - static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { + static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { PyErr_Format(PyExc_ValueError, "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); } /* RaiseNeedMoreValuesToUnpack */ - static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { + static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { PyErr_Format(PyExc_ValueError, "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", index, (index == 1) ? "" : "s"); } /* RaiseNoneIterError */ - static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { + static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); } /* SaveResetException */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { *type = tstate->exc_type; *value = tstate->exc_value; @@ -10843,7 +10329,7 @@ static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject #endif /* PyErrExceptionMatches */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err) { PyObject *exc_type = tstate->curexc_type; if (exc_type == err) return 1; @@ -10853,7 +10339,7 @@ static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tsta #endif /* GetException */ - #if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { #else static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) { @@ -10914,7 +10400,7 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) } /* Import */ - static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { PyObject *empty_list = 0; PyObject *module = 0; PyObject *global_dict = 0; @@ -10988,7 +10474,7 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) } /* ImportFrom */ - static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { + static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Format(PyExc_ImportError, @@ -11001,6 +10487,70 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) return value; } +/* PyObjectCallMethO */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectCallOneArg */ + #if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCall(func, &arg, 1); + } +#endif +#ifdef __Pyx_CyFunction_USED + if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { +#else + if (likely(PyCFunction_Check(func))) { +#endif + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); +#if CYTHON_FAST_PYCCALL + } else if (PyCFunction_GET_FLAGS(func) & METH_FASTCALL) { + return __Pyx_PyCFunction_FastCall(func, &arg, 1); +#endif + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_Pack(1, arg); + if (unlikely(!args)) return NULL; + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +#endif + /* PyObjectCallNoArg */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 83255472b1..ac7cdafbd5 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -19,7 +19,6 @@ except ImportError: # in scipy > 0.15, fblas function has been removed import scipy.linalg.blas as fblas -from _utils_any2vec import compute_ngrams, ft_hash from word2vec_inner cimport bisect_left, random_int32, \ scopy, saxpy, sdot, dsdot, snrm2, sscal, \ REAL_t, EXP_TABLE, \ @@ -317,11 +316,8 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): continue indexes[effective_words] = word.index - subwords = [ - model.wv.hash2index[ft_hash(subword) % model.bucket] - for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - ] - word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + subwords = model.wv.buckets_word[word.index] + word_subwords = np.array((word.index,) + subwords, dtype=np.uint32) subwords_idx_len[effective_words] = (len(subwords) + 1) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # ensures reference count of word_subwords doesn't reach 0 @@ -449,10 +445,7 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): continue indexes[effective_words] = word.index - subwords = [ - model.wv.hash2index[ft_hash(subword) % model.bucket] - for subword in compute_ngrams(token, model.wv.min_n, model.wv.max_n) - ] + subwords = model.wv.buckets_word[word.index] word_subwords = np.array(subwords, dtype=np.uint32) subwords_idx_len[effective_words] = len(subwords) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 97061e97d6..f07ad092b6 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1535,6 +1535,7 @@ def __init__(self, vector_size, min_n, max_n): self.vectors_vocab_norm = None self.vectors_ngrams = None self.vectors_ngrams_norm = None + self.buckets_word = None self.hash2index = {} self.min_n = min_n self.max_n = max_n From 1d8611141ea606f7f8fdcf00562f2221d11d887d Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Mon, 26 Feb 2018 18:52:27 +0100 Subject: [PATCH 10/20] Remove last occurences of wv.ngrams_word and wv.ngrams --- gensim/models/deprecated/fasttext.py | 2 -- gensim/test/test_fasttext.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/gensim/models/deprecated/fasttext.py b/gensim/models/deprecated/fasttext.py index 3a2162d5ea..594c310b9a 100644 --- a/gensim/models/deprecated/fasttext.py +++ b/gensim/models/deprecated/fasttext.py @@ -103,8 +103,6 @@ def load_old_fasttext(*args, **kwargs): new_model.wv.index2word = old_model.wv.index2word new_model.vocabulary.cum_table = old_model.cum_table - new_model.wv.ngrams_word = old_model.wv.ngrams_word - new_model.wv.ngrams = old_model.wv.ngrams new_model.wv.hash2index = old_model.wv.hash2index new_model.train_count = old_model.train_count diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index 7a8afd0d82..4fe32cb18e 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -538,8 +538,6 @@ def testLoadOldModel(self): self.assertTrue(model.trainables.vectors_lockf.shape == (12, )) self.assertTrue(model.vocabulary.cum_table.shape == (12, )) - self.assertEqual(len(model.wv.ngrams_word), 12) - self.assertEqual(len(model.wv.ngrams), 202) self.assertEqual(len(model.wv.hash2index), 202) self.assertTrue(model.wv.vectors_vocab.shape == (12, 100)) self.assertTrue(model.wv.vectors_ngrams.shape == (202, 100)) @@ -554,8 +552,6 @@ def testLoadOldModel(self): self.assertTrue(model.trainables.vectors_lockf.shape == (12, )) self.assertTrue(model.vocabulary.cum_table.shape == (12, )) - self.assertEqual(len(model.wv.ngrams_word), 12) - self.assertEqual(len(model.wv.ngrams), 202) self.assertEqual(len(model.wv.hash2index), 202) self.assertTrue(model.wv.vectors_vocab.shape == (12, 100)) self.assertTrue(model.wv.vectors_ngrams.shape == (202, 100)) From 6aaab0a53dd09fea1ad488acd8bc77bffa1ac6c5 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 11:55:40 +0100 Subject: [PATCH 11/20] fasttext: use buckets_word cache also for non-Cython training --- gensim/models/fasttext.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 375ca29b03..11d42a5747 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -92,7 +92,7 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): for index in word2_indices: vocab_subwords_indices += [index] - word2_subwords += _compute_ngrams(model.wv.index2word[index], model.min_n, model.max_n) + word2_subwords += list(model.wv.buckets_word[index]) for subword in word2_subwords: ngrams_subwords_indices.append( @@ -143,11 +143,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): # now go over all words from the (reduced) window, predicting each one in turn start = max(0, pos - model.window + reduced_window) - subwords_indices = [word.index] - word2_subwords = _compute_ngrams(model.wv.index2word[word.index], model.min_n, model.max_n) - - for subword in word2_subwords: - subwords_indices.append(model.wv.hash2index[_ft_hash(subword) % model.bucket]) + subwords_indices = (word.index,) + subwords_indices += model.wv.buckets_word[word.index] for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start): if pos2 != pos: # don't train on the `word` itself From 85679edd0c9e980980bab4d664997acdf6558c14 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 11:56:01 +0100 Subject: [PATCH 12/20] fasttext: Add buckets_ngram size to memory estimate --- gensim/models/fasttext.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 11d42a5747..4117f49642 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -409,11 +409,16 @@ def estimate_memory(self, vocab_size=None, report=None): report['syn1neg'] = len(self.wv.vocab) * l1_size if self.word_ngrams > 0 and self.wv.vocab: buckets = set() + num_ngrams = 0 for word in self.wv.vocab: ngrams = _compute_ngrams(word, self.min_n, self.max_n) + num_ngrams += len(ngrams) buckets.update(_ft_hash(ng) % self.bucket for ng in ngrams) num_buckets = len(buckets) report['syn0_ngrams'] = len(buckets) * vec_size + # A tuple (48 bytes) with num_ngrams_word ints (8 bytes) for each word + # Only used during training, not stored with the model + report['buckets_word'] = 48 * len(self.wv.vocab) + 8 * num_ngrams elif self.word_ngrams > 0: logger.warn( 'subword information is enabled, but no vocabulary could be found, estimated required memory might be ' From e574e905fda13b8725803a3feea5732cdd077ac4 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 11:56:18 +0100 Subject: [PATCH 13/20] fasttext: Don't store buckets_word with the model --- gensim/models/fasttext.py | 2 +- gensim/models/keyedvectors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 4117f49642..5c2df15c2e 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -699,7 +699,7 @@ def save(self, *args, **kwargs): Path to the file. """ - kwargs['ignore'] = kwargs.get('ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm']) + kwargs['ignore'] = kwargs.get('ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm', 'buckets_word']) super(FastText, self).save(*args, **kwargs) @classmethod diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index f07ad092b6..94536b567c 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1584,7 +1584,7 @@ def save(self, *args, **kwargs): """ # don't bother storing the cached normalized vectors - kwargs['ignore'] = kwargs.get('ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm']) + kwargs['ignore'] = kwargs.get('ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm', 'buckets_word']) super(FastTextKeyedVectors, self).save(*args, **kwargs) def word_vec(self, word, use_norm=False): From 2a090c6fc960b685c113b115e23200b188055ab3 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 11:57:21 +0100 Subject: [PATCH 14/20] fasttext: Use smaller model for test_estimate_memory --- gensim/test/test_fasttext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index 4fe32cb18e..a06ebc1ff0 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -516,7 +516,7 @@ def test_bucket_ngrams(self): self.assertEqual(model.wv.syn0_ngrams.shape, (20, 10)) def test_estimate_memory(self): - model = FT_gensim(sg=1, hs=1, negative=5, min_count=3) + model = FT_gensim(sg=1, hs=1, size=10, negative=5, min_count=3) model.build_vocab(sentences) report = model.estimate_memory() self.assertEqual(report['vocab'], 2800) From 33968dcb6496d8c453c3b880d3e47ecb6c60c947 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 12:02:16 +0100 Subject: [PATCH 15/20] fasttext: Fix pure python training code --- gensim/models/fasttext.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 5c2df15c2e..b157e268ab 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -92,11 +92,7 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): for index in word2_indices: vocab_subwords_indices += [index] - word2_subwords += list(model.wv.buckets_word[index]) - - for subword in word2_subwords: - ngrams_subwords_indices.append( - model.wv.hash2index[_ft_hash(subword) % model.bucket]) + ngram_subwords_indices.extend(model.wv.buckets_word[index]) l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size From 76a0675b71917894797b10371f73fcf841593085 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 12:54:59 +0100 Subject: [PATCH 16/20] fasttext: Fix asserts for test_estimate_memory --- gensim/test/test_fasttext.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index a06ebc1ff0..a2ffcfb0fa 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -520,11 +520,12 @@ def test_estimate_memory(self): model.build_vocab(sentences) report = model.estimate_memory() self.assertEqual(report['vocab'], 2800) - self.assertEqual(report['syn0_vocab'], 1600) - self.assertEqual(report['syn1'], 1600) - self.assertEqual(report['syn1neg'], 1600) - self.assertEqual(report['syn0_ngrams'], 22400) - self.assertEqual(report['total'], 30000) + self.assertEqual(report['syn0_vocab'], 160) + self.assertEqual(report['syn1'], 160) + self.assertEqual(report['syn1neg'], 160) + self.assertEqual(report['syn0_ngrams'], 2240) + self.assertEqual(report['buckets_word'], 640) + self.assertEqual(report['total'], 6160) def testLoadOldModel(self): """Test loading fasttext models from previous version""" From 0a2ae3c5b1398ac26f0416d3820950c651e3980b Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 13:04:26 +0100 Subject: [PATCH 17/20] fasttext: Fix typo and style errors --- gensim/models/fasttext.py | 6 +++--- gensim/models/keyedvectors.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index b157e268ab..6f044265de 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -86,13 +86,12 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start) word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - word2_subwords = [] vocab_subwords_indices = [] ngrams_subwords_indices = [] for index in word2_indices: vocab_subwords_indices += [index] - ngram_subwords_indices.extend(model.wv.buckets_word[index]) + ngrams_subwords_indices.extend(model.wv.buckets_word[index]) l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size @@ -695,7 +694,8 @@ def save(self, *args, **kwargs): Path to the file. """ - kwargs['ignore'] = kwargs.get('ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm', 'buckets_word']) + kwargs['ignore'] = kwargs.get( + 'ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm', 'buckets_word']) super(FastText, self).save(*args, **kwargs) @classmethod diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 94536b567c..f0377d0d46 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1584,7 +1584,8 @@ def save(self, *args, **kwargs): """ # don't bother storing the cached normalized vectors - kwargs['ignore'] = kwargs.get('ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm', 'buckets_word']) + kwargs['ignore'] = kwargs.get( + 'ignore', ['vectors_norm', 'vectors_vocab_norm', 'vectors_ngrams_norm', 'buckets_word']) super(FastTextKeyedVectors, self).save(*args, **kwargs) def word_vec(self, word, use_norm=False): From 0fe0f80be58e2c65622521d00e66e47b145730d1 Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 17:43:04 +0100 Subject: [PATCH 18/20] fasttext: Simplify code as per @jayantj's review --- gensim/models/fasttext.py | 18 ++++++------------ gensim/models/keyedvectors.py | 4 +++- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 6f044265de..46e68b44ec 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -775,19 +775,16 @@ def init_ngrams_weights(self, wv, update=False, vocabulary=None): wv.hash2index = {} wv.buckets_word = {} ngram_indices = [] - new_hash_count = 0 - wv.num_ngram_vectors = 0 for word, vocab in wv.vocab.items(): buckets = [] for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): ngram_hash = _ft_hash(ngram) % self.bucket if ngram_hash not in wv.hash2index: - wv.num_ngram_vectors += 1 + wv.hash2index[ngram_hash] = len(ngram_indices) ngram_indices.append(ngram_hash) - wv.hash2index[ngram_hash] = new_hash_count - new_hash_count = new_hash_count + 1 buckets.append(wv.hash2index[ngram_hash]) wv.buckets_word[vocab.index] = tuple(buckets) + wv.num_ngram_vectors = len(ngram_indices) logger.info("Total number of ngrams is %d", wv.num_ngram_vectors) @@ -795,17 +792,14 @@ def init_ngrams_weights(self, wv, update=False, vocabulary=None): self.vectors_ngrams_lockf = self.vectors_ngrams_lockf.take(ngram_indices, axis=0) self.reset_ngrams_weights(wv) else: - if not wv.buckets_word: - wv.buckets_word = {} - new_hash_count = 0 + wv.buckets_word = {} num_new_ngrams = 0 for word, vocab in wv.vocab.items(): buckets = [] for ngram in _compute_ngrams(word, wv.min_n, wv.max_n): ngram_hash = _ft_hash(ngram) % self.bucket if ngram_hash not in wv.hash2index: - wv.hash2index[ngram_hash] = new_hash_count + self.old_hash2index_len - new_hash_count = new_hash_count + 1 + wv.hash2index[ngram_hash] = num_new_ngrams + self.old_hash2index_len num_new_ngrams += 1 buckets.append(wv.hash2index[ngram_hash]) wv.buckets_word[vocab.index] = tuple(buckets) @@ -879,9 +873,9 @@ def init_ngrams_post_load(self, file_name, wv): ngram_hash = _ft_hash(ngram) % self.bucket if ngram_hash in wv.hash2index: continue + wv.hash2index[ngram_hash] = len(ngram_indices) ngram_indices.append(len(wv.vocab) + ngram_hash) - wv.hash2index[ngram_hash] = wv.num_ngram_vectors - wv.num_ngram_vectors += 1 + wv.num_ngram_vectors = len(ngram_indices) wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0) ngram_weights = wv.vectors_ngrams diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index f0377d0d46..00eee5112c 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1606,12 +1606,14 @@ def word_vec(self, word, use_norm=False): ngram_weights = self.vectors_ngrams_norm else: ngram_weights = self.vectors_ngrams + ngrams_found = 0 for ngram in ngrams: ngram_hash = _ft_hash(ngram) % self.bucket if ngram_hash in self.hash2index: word_vec += ngram_weights[self.hash2index[ngram_hash]] + ngrams_found += 1 if word_vec.any(): - return word_vec / len(ngrams) + return word_vec / max(1, ngrams_found) else: # No ngrams of the word are present in self.ngrams raise KeyError('all ngrams for word %s absent from model' % word) From 7cb46e3e8187d078e59421830eea2f0b3b69410e Mon Sep 17 00:00:00 2001 From: Johannes Baiter Date: Wed, 28 Feb 2018 17:53:54 +0100 Subject: [PATCH 19/20] Update MANIFEST.in and documentation with utils_any2vec implementations --- MANIFEST.in | 2 ++ docs/src/apiref.rst | 2 ++ docs/src/models/_utils_any2vec.rst | 9 +++++++++ docs/src/models/utils_any2vec.rst | 9 +++++++++ 4 files changed, 22 insertions(+) create mode 100644 docs/src/models/_utils_any2vec.rst create mode 100644 docs/src/models/utils_any2vec.rst diff --git a/MANIFEST.in b/MANIFEST.in index 56afb80659..9bfc31660f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -12,6 +12,8 @@ include gensim/models/doc2vec_inner.c include gensim/models/doc2vec_inner.pyx include gensim/models/fasttext_inner.c include gensim/models/fasttext_inner.pyx +include gensim/models/_utils_any2vec.c +include gensim/models/_utils_any2vec.pyx include gensim/corpora/_mmreader.c include gensim/corpora/_mmreader.pyx include gensim/_matutils.c diff --git a/docs/src/apiref.rst b/docs/src/apiref.rst index 6579cff495..66fe192b07 100644 --- a/docs/src/apiref.rst +++ b/docs/src/apiref.rst @@ -51,6 +51,8 @@ Modules: models/coherencemodel models/basemodel models/callbacks + models/utils_any2vec + models/_utils_any2vec models/wrappers/ldamallet models/wrappers/dtmmodel models/wrappers/ldavowpalwabbit.rst diff --git a/docs/src/models/_utils_any2vec.rst b/docs/src/models/_utils_any2vec.rst new file mode 100644 index 0000000000..46e5541ec3 --- /dev/null +++ b/docs/src/models/_utils_any2vec.rst @@ -0,0 +1,9 @@ +:mod:`models._utils_any2vec` -- Cython utils for any2vec models +=============================================================== + +.. automodule:: gensim.models._utils_any2vec + :synopsis: Cython utils for any2vec models + :members: + :inherited-members: + :undoc-members: + :show-inheritance: diff --git a/docs/src/models/utils_any2vec.rst b/docs/src/models/utils_any2vec.rst new file mode 100644 index 0000000000..123ee265e6 --- /dev/null +++ b/docs/src/models/utils_any2vec.rst @@ -0,0 +1,9 @@ +:mod:`models.utils_any2vec` -- Utils for any2vec models +======================================================= + +.. automodule:: gensim.models.utils_any2vec + :synopsis: Utils for any2vec models + :members: + :inherited-members: + :undoc-members: + :show-inheritance: From dcc085727397e233db5fc0a6a8b68bddb6e51012 Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 1 Mar 2018 12:35:43 +0500 Subject: [PATCH 20/20] last fixes (add option for cython compiler, fix descriptions, etc) --- gensim/models/_utils_any2vec.c | 884 +++++++++++++++++++++++-------- gensim/models/_utils_any2vec.pyx | 35 ++ gensim/models/utils_any2vec.py | 35 +- 3 files changed, 728 insertions(+), 226 deletions(-) diff --git a/gensim/models/_utils_any2vec.c b/gensim/models/_utils_any2vec.c index 48ade88f1c..24fed7ffcf 100644 --- a/gensim/models/_utils_any2vec.c +++ b/gensim/models/_utils_any2vec.c @@ -1,20 +1,14 @@ -/* Generated by Cython 0.25.2 */ - -/* BEGIN: Cython Metadata -{ - "distutils": {}, - "module_name": "gensim.models._utils_any2vec" -} -END: Cython Metadata */ +/* Generated by Cython 0.27.3 */ #define PY_SSIZE_T_CLEAN #include "Python.h" #ifndef Py_PYTHON_H #error Python headers needed to compile C extensions, please install development version of Python. -#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) - #error Cython requires Python 2.6+ or Python 3.2+. +#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.6+ or Python 3.3+. #else -#define CYTHON_ABI "0_25_2" +#define CYTHON_ABI "0_27_3" +#define CYTHON_FUTURE_DIVISION 0 #include #ifndef offsetof #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) @@ -36,8 +30,9 @@ END: Cython Metadata */ #ifndef DL_EXPORT #define DL_EXPORT(t) t #endif +#define __PYX_COMMA , #ifndef HAVE_LONG_LONG - #if PY_VERSION_HEX >= 0x03030000 || (PY_MAJOR_VERSION == 2 && PY_VERSION_HEX >= 0x02070000) + #if PY_VERSION_HEX >= 0x02070000 #define HAVE_LONG_LONG #endif #endif @@ -53,8 +48,14 @@ END: Cython Metadata */ #define CYTHON_COMPILING_IN_CPYTHON 0 #undef CYTHON_USE_TYPE_SLOTS #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif #undef CYTHON_USE_PYLIST_INTERNALS #define CYTHON_USE_PYLIST_INTERNALS 0 #undef CYTHON_USE_UNICODE_INTERNALS @@ -73,6 +74,10 @@ END: Cython Metadata */ #define CYTHON_FAST_THREAD_STATE 0 #undef CYTHON_FAST_PYCALL #define CYTHON_FAST_PYCALL 0 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 #elif defined(PYSTON_VERSION) #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_PYSTON 1 @@ -80,6 +85,8 @@ END: Cython Metadata */ #ifndef CYTHON_USE_TYPE_SLOTS #define CYTHON_USE_TYPE_SLOTS 1 #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 #undef CYTHON_USE_ASYNC_SLOTS #define CYTHON_USE_ASYNC_SLOTS 0 #undef CYTHON_USE_PYLIST_INTERNALS @@ -104,6 +111,10 @@ END: Cython Metadata */ #define CYTHON_FAST_THREAD_STATE 0 #undef CYTHON_FAST_PYCALL #define CYTHON_FAST_PYCALL 0 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 #else #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_PYSTON 0 @@ -111,6 +122,12 @@ END: Cython Metadata */ #ifndef CYTHON_USE_TYPE_SLOTS #define CYTHON_USE_TYPE_SLOTS 1 #endif + #if PY_VERSION_HEX < 0x02070000 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #elif !defined(CYTHON_USE_PYTYPE_LOOKUP) + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif #if PY_MAJOR_VERSION < 3 #undef CYTHON_USE_ASYNC_SLOTS #define CYTHON_USE_ASYNC_SLOTS 0 @@ -150,6 +167,12 @@ END: Cython Metadata */ #ifndef CYTHON_FAST_PYCALL #define CYTHON_FAST_PYCALL 1 #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT (0 && PY_VERSION_HEX >= 0x03050000) + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1) + #endif #endif #if !defined(CYTHON_FAST_PYCCALL) #define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) @@ -188,19 +211,44 @@ END: Cython Metadata */ #ifndef Py_TPFLAGS_HAVE_FINALIZE #define Py_TPFLAGS_HAVE_FINALIZE 0 #endif -#ifndef METH_FASTCALL - #define METH_FASTCALL 0x80 - typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject **args, - Py_ssize_t nargs, PyObject *kwnames); +#if PY_VERSION_HEX < 0x030700A0 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject **args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject **args, + Py_ssize_t nargs, PyObject *kwnames); #else #define __Pyx_PyCFunctionFast _PyCFunctionFast + #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords #endif #if CYTHON_FAST_PYCCALL #define __Pyx_PyFastCFunction_Check(func)\ - ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST))))) + ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS))))) #else #define __Pyx_PyFastCFunction_Check(func) 0 #endif +#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #define CYTHON_PEP393_ENABLED 1 #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ @@ -284,7 +332,6 @@ END: Cython Metadata */ #ifndef PySet_CheckExact #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) #endif -#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) #define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) #if PY_MAJOR_VERSION >= 3 #define PyIntObject PyLongObject @@ -324,20 +371,28 @@ END: Cython Metadata */ #else #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) #endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif #if CYTHON_USE_ASYNC_SLOTS #if PY_VERSION_HEX >= 0x030500B1 #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct typedef struct { unaryfunc am_await; unaryfunc am_aiter; unaryfunc am_anext; } __Pyx_PyAsyncMethodsStruct; - #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) - #endif -#else - #define __Pyx_PyType_AsAsync(obj) NULL #endif #ifndef CYTHON_RESTRICT #if defined(__GNUC__) @@ -378,6 +433,43 @@ END: Cython Metadata */ # endif #endif #define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + #endif + #endif +#else + #include +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) && __cplusplus >= 201103L + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #elif __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__ ) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif #ifndef CYTHON_INLINE #if defined(__clang__) @@ -418,14 +510,6 @@ static CYTHON_INLINE float __PYX_NAN() { __pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \ } -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) -#else - #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) -#endif - #ifndef __PYX_EXTERN_C #ifdef __cplusplus #define __PYX_EXTERN_C extern "C" @@ -440,7 +524,7 @@ static CYTHON_INLINE float __PYX_NAN() { #include #endif /* _OPENMP */ -#ifdef PYREX_WITHOUT_ASSERTIONS +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) #define CYTHON_WITHOUT_ASSERTIONS #endif @@ -471,8 +555,8 @@ typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* enc #define __Pyx_sst_abs(value) abs(value) #elif SIZEOF_LONG >= SIZEOF_SIZE_T #define __Pyx_sst_abs(value) labs(value) -#elif defined (_MSC_VER) && defined (_M_X64) - #define __Pyx_sst_abs(value) _abs64(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define __Pyx_sst_abs(value) llabs(value) #elif defined (__GNUC__) @@ -480,8 +564,8 @@ typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* enc #else #define __Pyx_sst_abs(value) ((value<0) ? -value : value) #endif -static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); -static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); #define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) #define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) #define __Pyx_PyBytes_FromString PyBytes_FromString @@ -494,23 +578,27 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize #endif -#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) #define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) #define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) #define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) #define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) -#if PY_MAJOR_VERSION < 3 -static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) -{ +static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) { const Py_UNICODE *u_end = u; while (*u_end++) ; return (size_t)(u_end - u - 1); } -#else -#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen -#endif #define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) #define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode #define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode @@ -519,6 +607,8 @@ static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) #define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); #if CYTHON_ASSUME_SAFE_MACROS @@ -617,10 +707,12 @@ static int __Pyx_init_sys_getdefaultencoding_params(void) { #define likely(x) (x) #define unlikely(x) (x) #endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } -static PyObject *__pyx_m; +static PyObject *__pyx_m = NULL; static PyObject *__pyx_d; static PyObject *__pyx_b; +static PyObject *__pyx_cython_runtime; static PyObject *__pyx_empty_tuple; static PyObject *__pyx_empty_bytes; static PyObject *__pyx_empty_unicode; @@ -723,9 +815,23 @@ static PyObject *__Pyx_GetBuiltinName(PyObject *name); static CYTHON_INLINE int __Pyx_init_unicode_iteration( PyObject* ustring, Py_ssize_t *length, void** data, int *kind); +/* UnicodeAsUCS4.proto */ +static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject*); + +/* object_ord.proto */ +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyObject_Ord(c)\ + (likely(PyUnicode_Check(c)) ? (long)__Pyx_PyUnicode_AsPy_UCS4(c) : __Pyx__PyObject_Ord(c)) +#else +#define __Pyx_PyObject_Ord(c) __Pyx__PyObject_Ord(c) +#endif +static long __Pyx__PyObject_Ord(PyObject* c); + /* ArgTypeTest.proto */ -static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, - const char *name, int exact); +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); /* PyObjectFormatSimple.proto */ #if CYTHON_COMPILING_IN_PYPY @@ -804,6 +910,49 @@ static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ const char* function_name); +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + /* CodeObjectCache.proto */ typedef struct { PyCodeObject* code_object; @@ -838,6 +987,18 @@ static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); /* CIntFromPy.proto */ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif + /* CheckBinaryVersion.proto */ static int __Pyx_check_binary_version(void); @@ -849,6 +1010,7 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *, int __pyx_skip_dispatch); /*proto*/ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObject *, unsigned int, unsigned int, int __pyx_skip_dispatch); /*proto*/ #define __Pyx_MODULE_NAME "gensim.models._utils_any2vec" +extern int __pyx_module_is_main_gensim__models___utils_any2vec; int __pyx_module_is_main_gensim__models___utils_any2vec = 0; /* Implementation of 'gensim.models._utils_any2vec' */ @@ -861,8 +1023,11 @@ static const char __pyx_k_word[] = "word"; static const char __pyx_k_max_n[] = "max_n"; static const char __pyx_k_min_n[] = "min_n"; static const char __pyx_k_range[] = "range"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_General_functions_used_for_any2v[] = "General functions used for any2vec models."; static PyObject *__pyx_kp_u_; static PyObject *__pyx_kp_u__2; +static PyObject *__pyx_n_s_cline_in_traceback; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_max_n; static PyObject *__pyx_n_s_min_n; @@ -874,18 +1039,18 @@ static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_2compute_ngrams(CYTHO static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; -/* "gensim/models/_utils_any2vec.pyx":7 - * # coding: utf-8 +/* "gensim/models/_utils_any2vec.pyx":10 + * """General functions used for any2vec models.""" * * cpdef ft_hash(unicode string): # <<<<<<<<<<<<<< - * cdef unsigned int h = 2166136261 - * for c in string: + * """Calculate hash based on `string`. + * Reproduce `hash method from Facebook fastText implementation */ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { unsigned int __pyx_v_h; - Py_UCS4 __pyx_v_c; + PyObject *__pyx_v_c = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -896,19 +1061,20 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__py int __pyx_t_6; Py_ssize_t __pyx_t_7; PyObject *__pyx_t_8 = NULL; + long __pyx_t_9; __Pyx_RefNannySetupContext("ft_hash", 0); - /* "gensim/models/_utils_any2vec.pyx":8 + /* "gensim/models/_utils_any2vec.pyx":26 * - * cpdef ft_hash(unicode string): + * """ * cdef unsigned int h = 2166136261 # <<<<<<<<<<<<<< * for c in string: * h ^= ord(c) */ __pyx_v_h = 0x811C9DC5; - /* "gensim/models/_utils_any2vec.pyx":9 - * cpdef ft_hash(unicode string): + /* "gensim/models/_utils_any2vec.pyx":27 + * """ * cdef unsigned int h = 2166136261 * for c in string: # <<<<<<<<<<<<<< * h ^= ord(c) @@ -916,25 +1082,29 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__py */ if (unlikely(__pyx_v_string == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' is not iterable"); - __PYX_ERR(0, 9, __pyx_L1_error) + __PYX_ERR(0, 27, __pyx_L1_error) } __Pyx_INCREF(__pyx_v_string); __pyx_t_1 = __pyx_v_string; - __pyx_t_6 = __Pyx_init_unicode_iteration(__pyx_t_1, (&__pyx_t_3), (&__pyx_t_4), (&__pyx_t_5)); if (unlikely(__pyx_t_6 == -1)) __PYX_ERR(0, 9, __pyx_L1_error) + __pyx_t_6 = __Pyx_init_unicode_iteration(__pyx_t_1, (&__pyx_t_3), (&__pyx_t_4), (&__pyx_t_5)); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(0, 27, __pyx_L1_error) for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7++) { __pyx_t_2 = __pyx_t_7; - __pyx_v_c = __Pyx_PyUnicode_READ(__pyx_t_5, __pyx_t_4, __pyx_t_2); + __pyx_t_8 = PyUnicode_FromOrdinal(__Pyx_PyUnicode_READ(__pyx_t_5, __pyx_t_4, __pyx_t_2)); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 27, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_XDECREF_SET(__pyx_v_c, __pyx_t_8); + __pyx_t_8 = 0; - /* "gensim/models/_utils_any2vec.pyx":10 + /* "gensim/models/_utils_any2vec.pyx":28 * cdef unsigned int h = 2166136261 * for c in string: * h ^= ord(c) # <<<<<<<<<<<<<< * h *= 16777619 * return h */ - __pyx_v_h = (__pyx_v_h ^ ((long)__pyx_v_c)); + __pyx_t_9 = __Pyx_PyObject_Ord(__pyx_v_c); if (unlikely(__pyx_t_9 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 28, __pyx_L1_error) + __pyx_v_h = (__pyx_v_h ^ __pyx_t_9); - /* "gensim/models/_utils_any2vec.pyx":11 + /* "gensim/models/_utils_any2vec.pyx":29 * for c in string: * h ^= ord(c) * h *= 16777619 # <<<<<<<<<<<<<< @@ -945,7 +1115,7 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__py } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/_utils_any2vec.pyx":12 + /* "gensim/models/_utils_any2vec.pyx":30 * h ^= ord(c) * h *= 16777619 * return h # <<<<<<<<<<<<<< @@ -953,18 +1123,18 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__py * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = __Pyx_PyInt_From_unsigned_int(__pyx_v_h); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 12, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyInt_From_unsigned_int(__pyx_v_h); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 30, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __pyx_r = __pyx_t_8; __pyx_t_8 = 0; goto __pyx_L0; - /* "gensim/models/_utils_any2vec.pyx":7 - * # coding: utf-8 + /* "gensim/models/_utils_any2vec.pyx":10 + * """General functions used for any2vec models.""" * * cpdef ft_hash(unicode string): # <<<<<<<<<<<<<< - * cdef unsigned int h = 2166136261 - * for c in string: + * """Calculate hash based on `string`. + * Reproduce `hash method from Facebook fastText implementation */ /* function exit code */ @@ -974,6 +1144,7 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__py __Pyx_AddTraceback("gensim.models._utils_any2vec.ft_hash", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; + __Pyx_XDECREF(__pyx_v_c); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; @@ -981,11 +1152,12 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(PyObject *__py /* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static char __pyx_doc_6gensim_6models_14_utils_any2vec_ft_hash[] = "ft_hash(unicode string)\nCalculate hash based on `string`.\n Reproduce `hash method from Facebook fastText implementation\n `_.\n\n Parameters\n ----------\n string : unicode\n The string whose hash needs to be calculated.\n\n Returns\n -------\n unsigned int\n The hash of the string.\n\n "; static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash(PyObject *__pyx_self, PyObject *__pyx_v_string) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("ft_hash (wrapper)", 0); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) __PYX_ERR(0, 7, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) __PYX_ERR(0, 10, __pyx_L1_error) __pyx_r = __pyx_pf_6gensim_6models_14_utils_any2vec_ft_hash(__pyx_self, ((PyObject*)__pyx_v_string)); /* function exit code */ @@ -1003,7 +1175,7 @@ static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_ft_hash(CYTHON_UNUSED PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("ft_hash", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(__pyx_v_string, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) + __pyx_t_1 = __pyx_f_6gensim_6models_14_utils_any2vec_ft_hash(__pyx_v_string, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -1020,12 +1192,12 @@ static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_ft_hash(CYTHON_UNUSED return __pyx_r; } -/* "gensim/models/_utils_any2vec.pyx":15 +/* "gensim/models/_utils_any2vec.pyx":33 * * * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< - * cdef unicode extended_word = f'<{word}>' - * ngrams = [] + * """Get the list of all possible ngrams for a given word. + * */ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ @@ -1050,14 +1222,14 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec int __pyx_t_12; __Pyx_RefNannySetupContext("compute_ngrams", 0); - /* "gensim/models/_utils_any2vec.pyx":16 + /* "gensim/models/_utils_any2vec.pyx":51 * - * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): + * """ * cdef unicode extended_word = f'<{word}>' # <<<<<<<<<<<<<< * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): */ - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 16, __pyx_L1_error) + __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 51, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_3 = 127; @@ -1065,7 +1237,7 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec __pyx_t_2 += 1; __Pyx_GIVEREF(__pyx_kp_u_); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_kp_u_); - __pyx_t_4 = __Pyx_PyObject_FormatSimple(__pyx_v_word, __pyx_empty_unicode); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_FormatSimple(__pyx_v_word, __pyx_empty_unicode); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 51, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_4) > __pyx_t_3) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_4) : __pyx_t_3; __pyx_t_2 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); @@ -1076,43 +1248,43 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec __pyx_t_2 += 1; __Pyx_GIVEREF(__pyx_kp_u__2); PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_kp_u__2); - __pyx_t_4 = __Pyx_PyUnicode_Join(__pyx_t_1, 3, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyUnicode_Join(__pyx_t_1, 3, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 51, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_extended_word = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/_utils_any2vec.pyx":17 - * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): + /* "gensim/models/_utils_any2vec.pyx":52 + * """ * cdef unicode extended_word = f'<{word}>' * ngrams = [] # <<<<<<<<<<<<<< * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): */ - __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 17, __pyx_L1_error) + __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 52, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_v_ngrams = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/_utils_any2vec.pyx":18 + /* "gensim/models/_utils_any2vec.pyx":53 * cdef unicode extended_word = f'<{word}>' * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): # <<<<<<<<<<<<<< * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) */ - __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_min_n); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_min_n); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = __pyx_v_max_n; - __pyx_t_2 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_extended_word); if (unlikely(__pyx_t_2 == -1)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_extended_word); if (unlikely(__pyx_t_2 == ((Py_ssize_t)-1))) __PYX_ERR(0, 53, __pyx_L1_error) if (((__pyx_t_5 < __pyx_t_2) != 0)) { __pyx_t_6 = __pyx_t_5; } else { __pyx_t_6 = __pyx_t_2; } - __pyx_t_1 = PyInt_FromSsize_t((__pyx_t_6 + 1)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_1 = PyInt_FromSsize_t((__pyx_t_6 + 1)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_4); @@ -1120,16 +1292,16 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_1); __pyx_t_4 = 0; __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_7, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_7 = __pyx_t_1; __Pyx_INCREF(__pyx_t_7); __pyx_t_6 = 0; __pyx_t_8 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_8 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 53, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { @@ -1137,17 +1309,17 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 53, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_7, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_7, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 53, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_7, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_7, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 53, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -1156,8 +1328,8 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { - if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 18, __pyx_L1_error) + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 53, __pyx_L1_error) } break; } @@ -1166,23 +1338,23 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec __Pyx_XDECREF_SET(__pyx_v_ngram_length, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/_utils_any2vec.pyx":19 + /* "gensim/models/_utils_any2vec.pyx":54 * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): # <<<<<<<<<<<<<< * ngrams.append(extended_word[i:i + ngram_length]) * return ngrams */ - __pyx_t_2 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_extended_word); if (unlikely(__pyx_t_2 == -1)) __PYX_ERR(0, 19, __pyx_L1_error) - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_extended_word); if (unlikely(__pyx_t_2 == ((Py_ssize_t)-1))) __PYX_ERR(0, 54, __pyx_L1_error) + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyNumber_Subtract(__pyx_t_1, __pyx_v_ngram_length); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_4 = PyNumber_Subtract(__pyx_t_1, __pyx_v_ngram_length); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_t_4, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_t_4, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); @@ -1190,16 +1362,16 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_4 = __pyx_t_1; __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 54, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { @@ -1207,17 +1379,17 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec if (likely(PyList_CheckExact(__pyx_t_4))) { if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(0, 54, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(0, 54, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -1226,8 +1398,8 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { - if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 19, __pyx_L1_error) + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 54, __pyx_L1_error) } break; } @@ -1236,23 +1408,23 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec __Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/_utils_any2vec.pyx":20 + /* "gensim/models/_utils_any2vec.pyx":55 * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) # <<<<<<<<<<<<<< * return ngrams */ - __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error) - __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_v_ngram_length); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 55, __pyx_L1_error) + __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_v_ngram_length); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_11 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_11 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_11 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 55, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyUnicode_Substring(__pyx_v_extended_word, __pyx_t_10, __pyx_t_11); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyUnicode_Substring(__pyx_v_extended_word, __pyx_t_10, __pyx_t_11); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_ngrams, __pyx_t_1); if (unlikely(__pyx_t_12 == -1)) __PYX_ERR(0, 20, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_ngrams, __pyx_t_1); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 55, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/_utils_any2vec.pyx":19 + /* "gensim/models/_utils_any2vec.pyx":54 * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): * for i in range(0, len(extended_word) - ngram_length + 1): # <<<<<<<<<<<<<< @@ -1262,7 +1434,7 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/_utils_any2vec.pyx":18 + /* "gensim/models/_utils_any2vec.pyx":53 * cdef unicode extended_word = f'<{word}>' * ngrams = [] * for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): # <<<<<<<<<<<<<< @@ -1272,7 +1444,7 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "gensim/models/_utils_any2vec.pyx":21 + /* "gensim/models/_utils_any2vec.pyx":56 * for i in range(0, len(extended_word) - ngram_length + 1): * ngrams.append(extended_word[i:i + ngram_length]) * return ngrams # <<<<<<<<<<<<<< @@ -1282,12 +1454,12 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec __pyx_r = __pyx_v_ngrams; goto __pyx_L0; - /* "gensim/models/_utils_any2vec.pyx":15 + /* "gensim/models/_utils_any2vec.pyx":33 * * * cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): # <<<<<<<<<<<<<< - * cdef unicode extended_word = f'<{word}>' - * ngrams = [] + * """Get the list of all possible ngrams for a given word. + * */ /* function exit code */ @@ -1309,6 +1481,7 @@ static PyObject *__pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(PyObjec /* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static char __pyx_doc_6gensim_6models_14_utils_any2vec_2compute_ngrams[] = "compute_ngrams(word, unsigned int min_n, unsigned int max_n)\nGet the list of all possible ngrams for a given word.\n\n Parameters\n ----------\n word : str\n The word whose ngrams need to be computed.\n min_n : unsigned int\n Minimum character length of the ngrams.\n max_n : unsigned int\n Maximum character length of the ngrams.\n\n Returns\n -------\n list of str\n Sequence of character ngrams.\n\n "; static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_word = 0; unsigned int __pyx_v_min_n; @@ -1324,8 +1497,11 @@ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObj const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + CYTHON_FALLTHROUGH; case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + CYTHON_FALLTHROUGH; case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } @@ -1334,19 +1510,21 @@ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObj case 0: if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_min_n)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 1); __PYX_ERR(0, 15, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 1); __PYX_ERR(0, 33, __pyx_L3_error) } + CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_n)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 2); __PYX_ERR(0, 15, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, 2); __PYX_ERR(0, 33, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "compute_ngrams") < 0)) __PYX_ERR(0, 15, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "compute_ngrams") < 0)) __PYX_ERR(0, 33, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; @@ -1356,12 +1534,12 @@ static PyObject *__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams(PyObj values[2] = PyTuple_GET_ITEM(__pyx_args, 2); } __pyx_v_word = values[0]; - __pyx_v_min_n = __Pyx_PyInt_As_unsigned_int(values[1]); if (unlikely((__pyx_v_min_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) - __pyx_v_max_n = __Pyx_PyInt_As_unsigned_int(values[2]); if (unlikely((__pyx_v_max_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) + __pyx_v_min_n = __Pyx_PyInt_As_unsigned_int(values[1]); if (unlikely((__pyx_v_min_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 33, __pyx_L3_error) + __pyx_v_max_n = __Pyx_PyInt_As_unsigned_int(values[2]); if (unlikely((__pyx_v_max_n == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 33, __pyx_L3_error) } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("compute_ngrams", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 33, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models._utils_any2vec.compute_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -1380,7 +1558,7 @@ static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_2compute_ngrams(CYTHO PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("compute_ngrams", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(__pyx_v_word, __pyx_v_min_n, __pyx_v_max_n, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) + __pyx_t_1 = __pyx_f_6gensim_6models_14_utils_any2vec_compute_ngrams(__pyx_v_word, __pyx_v_min_n, __pyx_v_max_n, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -1398,23 +1576,37 @@ static PyObject *__pyx_pf_6gensim_6models_14_utils_any2vec_2compute_ngrams(CYTHO } static PyMethodDef __pyx_methods[] = { - {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash, METH_O, 0}, - {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, 0}, + {"ft_hash", (PyCFunction)__pyx_pw_6gensim_6models_14_utils_any2vec_1ft_hash, METH_O, __pyx_doc_6gensim_6models_14_utils_any2vec_ft_hash}, + {"compute_ngrams", (PyCFunction)__pyx_pw_6gensim_6models_14_utils_any2vec_3compute_ngrams, METH_VARARGS|METH_KEYWORDS, __pyx_doc_6gensim_6models_14_utils_any2vec_2compute_ngrams}, {0, 0, 0, 0} }; #if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec__utils_any2vec(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec__utils_any2vec}, + {0, NULL} +}; +#endif + static struct PyModuleDef __pyx_moduledef = { - #if PY_VERSION_HEX < 0x03020000 - { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, - #else PyModuleDef_HEAD_INIT, - #endif "_utils_any2vec", - 0, /* m_doc */ + __pyx_k_General_functions_used_for_any2v, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #else -1, /* m_size */ + #endif __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else NULL, /* m_reload */ + #endif NULL, /* m_traverse */ NULL, /* m_clear */ NULL /* m_free */ @@ -1424,6 +1616,7 @@ static struct PyModuleDef __pyx_moduledef = { static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0}, {&__pyx_kp_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 0}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_max_n, __pyx_k_max_n, sizeof(__pyx_k_max_n), 0, 0, 1, 1}, {&__pyx_n_s_min_n, __pyx_k_min_n, sizeof(__pyx_k_min_n), 0, 0, 1, 1}, @@ -1433,7 +1626,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 53, __pyx_L1_error) return 0; __pyx_L1_error:; return -1; @@ -1461,10 +1654,54 @@ PyMODINIT_FUNC init_utils_any2vec(void) #else PyMODINIT_FUNC PyInit__utils_any2vec(void); /*proto*/ PyMODINIT_FUNC PyInit__utils_any2vec(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name) { + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + result = PyDict_SetItemString(moddict, to_name, value); + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static PyObject* __pyx_pymod_create(PyObject *spec, CYTHON_UNUSED PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__") < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__") < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__") < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__") < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static int __pyx_pymod_exec__utils_any2vec(PyObject *__pyx_pyinit_module) +#endif #endif { PyObject *__pyx_t_1 = NULL; __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m && __pyx_m == __pyx_pyinit_module) return 0; + #endif #if CYTHON_REFNANNY __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); if (!__Pyx_RefNanny) { @@ -1491,6 +1728,9 @@ PyMODINIT_FUNC PyInit__utils_any2vec(void) #ifdef __Pyx_Generator_USED if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif #ifdef __Pyx_StopAsyncIteration_USED if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif @@ -1502,15 +1742,21 @@ PyMODINIT_FUNC PyInit__utils_any2vec(void) #endif #endif /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("_utils_any2vec", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + __pyx_m = Py_InitModule4("_utils_any2vec", __pyx_methods, __pyx_k_General_functions_used_for_any2v, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); #else __pyx_m = PyModule_Create(&__pyx_moduledef); #endif if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) Py_INCREF(__pyx_d); __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) #if CYTHON_COMPILING_IN_PYPY Py_INCREF(__pyx_b); #endif @@ -1552,7 +1798,7 @@ PyMODINIT_FUNC PyInit__utils_any2vec(void) * # cython: boundscheck=False * # cython: wraparound=False */ - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -1564,7 +1810,7 @@ PyMODINIT_FUNC PyInit__utils_any2vec(void) __Pyx_XDECREF(__pyx_t_1); if (__pyx_m) { if (__pyx_d) { - __Pyx_AddTraceback("init gensim.models._utils_any2vec", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("init gensim.models._utils_any2vec", 0, __pyx_lineno, __pyx_filename); } Py_DECREF(__pyx_m); __pyx_m = 0; } else if (!PyErr_Occurred()) { @@ -1572,10 +1818,12 @@ PyMODINIT_FUNC PyInit__utils_any2vec(void) } __pyx_L0:; __Pyx_RefNannyFinishContext(); - #if PY_MAJOR_VERSION < 3 - return; - #else + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 return __pyx_m; + #else + return; #endif } @@ -1627,30 +1875,84 @@ static CYTHON_INLINE int __Pyx_init_unicode_iteration( return 0; } -/* ArgTypeTest */ -static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { +/* UnicodeAsUCS4 */ +static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) { + Py_ssize_t length; + #if CYTHON_PEP393_ENABLED + length = PyUnicode_GET_LENGTH(x); + if (likely(length == 1)) { + return PyUnicode_READ_CHAR(x, 0); + } + #else + length = PyUnicode_GET_SIZE(x); + if (likely(length == 1)) { + return PyUnicode_AS_UNICODE(x)[0]; + } + #if Py_UNICODE_SIZE == 2 + else if (PyUnicode_GET_SIZE(x) == 2) { + Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0]; + if (high_val >= 0xD800 && high_val <= 0xDBFF) { + Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1]; + if (low_val >= 0xDC00 && low_val <= 0xDFFF) { + return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1))); + } + } + } + #endif + #endif + PyErr_Format(PyExc_ValueError, + "only single character unicode strings can be converted to Py_UCS4, " + "got length %" CYTHON_FORMAT_SSIZE_T "d", length); + return (Py_UCS4)-1; +} + +/* object_ord */ +static long __Pyx__PyObject_Ord(PyObject* c) { + Py_ssize_t size; + if (PyBytes_Check(c)) { + size = PyBytes_GET_SIZE(c); + if (likely(size == 1)) { + return (unsigned char) PyBytes_AS_STRING(c)[0]; + } +#if PY_MAJOR_VERSION < 3 + } else if (PyUnicode_Check(c)) { + return (long)__Pyx_PyUnicode_AsPy_UCS4(c); +#endif +#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + } else if (PyByteArray_Check(c)) { + size = PyByteArray_GET_SIZE(c); + if (likely(size == 1)) { + return (unsigned char) PyByteArray_AS_STRING(c)[0]; + } +#endif + } else { + PyErr_Format(PyExc_TypeError, + "ord() expected string of length 1, but %.200s found", c->ob_type->tp_name); + return (long)(Py_UCS4)-1; + } PyErr_Format(PyExc_TypeError, - "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", - name, type->tp_name, Py_TYPE(obj)->tp_name); + "ord() expected a character, but string of length %zd found", size); + return (long)(Py_UCS4)-1; } -static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, - const char *name, int exact) + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) { if (unlikely(!type)) { PyErr_SetString(PyExc_SystemError, "Missing type object"); return 0; } - if (none_allowed && obj == Py_None) return 1; else if (exact) { - if (likely(Py_TYPE(obj) == type)) return 1; #if PY_MAJOR_VERSION == 2 - else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; #endif } else { - if (likely(PyObject_TypeCheck(obj, type))) return 1; + if (likely(__Pyx_TypeCheck(obj, type))) return 1; } - __Pyx_RaiseArgumentTypeInvalid(name, obj, type); + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", + name, type->tp_name, Py_TYPE(obj)->tp_name); return 0; } @@ -1691,7 +1993,7 @@ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_co if (!CYTHON_PEP393_ENABLED || ukind == result_ukind) { memcpy((char *)result_udata + char_pos * result_ukind, udata, ulength * result_ukind); } else { - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 || defined(_PyUnicode_FastCopyCharacters) _PyUnicode_FastCopyCharacters(result_uval, char_pos, uval, 0, ulength); #else Py_ssize_t j; @@ -2020,6 +2322,67 @@ static int __Pyx_ParseOptionalKeywords( return -1; } +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +} +#endif + +/* CLineInTraceback */ +#ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(CYTHON_UNUSED PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + use_cline = PyDict_GetItem(*cython_runtime_dict, __pyx_n_s_cline_in_traceback); + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (PyObject_Not(use_cline) != 0) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + /* CodeObjectCache */ static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { int start = 0, mid = 0, end = count - 1; @@ -2160,18 +2523,22 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename) { PyCodeObject *py_code = 0; PyFrameObject *py_frame = 0; - py_code = __pyx_find_code_object(c_line ? c_line : py_line); + PyThreadState *tstate = __Pyx_PyThreadState_Current; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); if (!py_code) { py_code = __Pyx_CreateCodeObjectForTraceback( funcname, c_line, py_line, filename); if (!py_code) goto bad; - __pyx_insert_code_object(c_line ? c_line : py_line, py_code); + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); } py_frame = PyFrame_New( - PyThreadState_GET(), /*PyThreadState *tstate,*/ - py_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ ); if (!py_frame) goto bad; __Pyx_PyFrame_SetLineNumber(py_frame, py_line); @@ -2832,6 +3199,78 @@ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { return (int) -1; } +/* FastTypeChecks */ +#if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = a->tp_base; + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + int res = exc_type1 ? __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type1) : 0; + if (!res) { + res = __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } + return res; +} +#endif +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject* exc_type) { + if (likely(err == exc_type)) return 1; + if (likely(PyExceptionClass_Check(err))) { + return __Pyx_inner_PyErr_GivenExceptionMatches2(err, NULL, exc_type); + } + return PyErr_GivenExceptionMatches(err, exc_type); +} +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *exc_type1, PyObject *exc_type2) { + if (likely(err == exc_type1 || err == exc_type2)) return 1; + if (likely(PyExceptionClass_Check(err))) { + return __Pyx_inner_PyErr_GivenExceptionMatches2(err, exc_type1, exc_type2); + } + return (PyErr_GivenExceptionMatches(err, exc_type1) || PyErr_GivenExceptionMatches(err, exc_type2)); +} +#endif + /* CheckBinaryVersion */ static int __Pyx_check_binary_version(void) { char ctversion[4], rtversion[4]; @@ -2874,6 +3313,8 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { #endif if (!*t->p) return -1; + if (PyObject_Hash(*t->p) == -1) + PyErr_Clear(); ++t; } return 0; @@ -2882,50 +3323,57 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); } -static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { Py_ssize_t ignore; return __Pyx_PyObject_AsStringAndSize(o, &ignore); } -static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { -#if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) - if ( -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - __Pyx_sys_getdefaultencoding_not_ascii && -#endif - PyUnicode_Check(o)) { -#if PY_VERSION_HEX < 0x03030000 - char* defenc_c; - PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); - if (!defenc) return NULL; - defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - { - char* end = defenc_c + PyBytes_GET_SIZE(defenc); - char* c; - for (c = defenc_c; c < end; c++) { - if ((unsigned char) (*c) >= 128) { - PyUnicode_AsASCIIString(o); - return NULL; - } + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; } } + } #endif - *length = PyBytes_GET_SIZE(defenc); - return defenc_c; + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} #else - if (__Pyx_PyUnicode_READY(o) == -1) return NULL; +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - if (PyUnicode_IS_ASCII(o)) { - *length = PyUnicode_GET_LENGTH(o); - return PyUnicode_AsUTF8(o); - } else { - PyUnicode_AsASCIIString(o); - return NULL; - } + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } #else - return PyUnicode_AsUTF8AndSize(o, length); + return PyUnicode_AsUTF8AndSize(o, length); #endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && #endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); } else #endif #if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) @@ -2949,6 +3397,26 @@ static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { if (is_true | (x == Py_False) | (x == Py_None)) return is_true; else return PyObject_IsTrue(x); } +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type %.200s). " + "The ability to return an instance of a strict subclass of int " + "is deprecated, and may be removed in a future version of Python.", + Py_TYPE(result)->tp_name)) { + Py_DECREF(result); + return NULL; + } + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type %.200s)", + type_name, type_name, Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; +} static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { #if CYTHON_USE_TYPE_SLOTS PyNumberMethods *m; @@ -2956,9 +3424,9 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { const char *name = NULL; PyObject *res = NULL; #if PY_MAJOR_VERSION < 3 - if (PyInt_Check(x) || PyLong_Check(x)) + if (likely(PyInt_Check(x) || PyLong_Check(x))) #else - if (PyLong_Check(x)) + if (likely(PyLong_Check(x))) #endif return __Pyx_NewRef(x); #if CYTHON_USE_TYPE_SLOTS @@ -2966,32 +3434,30 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { #if PY_MAJOR_VERSION < 3 if (m && m->nb_int) { name = "int"; - res = PyNumber_Int(x); + res = m->nb_int(x); } else if (m && m->nb_long) { name = "long"; - res = PyNumber_Long(x); + res = m->nb_long(x); } #else - if (m && m->nb_int) { + if (likely(m && m->nb_int)) { name = "int"; - res = PyNumber_Long(x); + res = m->nb_int(x); } #endif #else - res = PyNumber_Int(x); + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } #endif - if (res) { + if (likely(res)) { #if PY_MAJOR_VERSION < 3 - if (!PyInt_Check(res) && !PyLong_Check(res)) { + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { #else - if (!PyLong_Check(res)) { + if (unlikely(!PyLong_CheckExact(res))) { #endif - PyErr_Format(PyExc_TypeError, - "__%.4s__ returned non-%.4s (type %.200s)", - name, name, Py_TYPE(res)->tp_name); - Py_DECREF(res); - return NULL; + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); } } else if (!PyErr_Occurred()) { diff --git a/gensim/models/_utils_any2vec.pyx b/gensim/models/_utils_any2vec.pyx index 81727af0bf..00bff990f7 100644 --- a/gensim/models/_utils_any2vec.pyx +++ b/gensim/models/_utils_any2vec.pyx @@ -2,9 +2,27 @@ # cython: boundscheck=False # cython: wraparound=False # cython: cdivision=True +# cython: embedsignature=True # coding: utf-8 +"""General functions used for any2vec models.""" + cpdef ft_hash(unicode string): + """Calculate hash based on `string`. + Reproduce `hash method from Facebook fastText implementation + `_. + + Parameters + ---------- + string : unicode + The string whose hash needs to be calculated. + + Returns + ------- + unsigned int + The hash of the string. + + """ cdef unsigned int h = 2166136261 for c in string: h ^= ord(c) @@ -13,6 +31,23 @@ cpdef ft_hash(unicode string): cpdef compute_ngrams(word, unsigned int min_n, unsigned int max_n): + """Get the list of all possible ngrams for a given word. + + Parameters + ---------- + word : str + The word whose ngrams need to be computed. + min_n : unsigned int + Minimum character length of the ngrams. + max_n : unsigned int + Maximum character length of the ngrams. + + Returns + ------- + list of str + Sequence of character ngrams. + + """ cdef unicode extended_word = f'<{word}>' ngrams = [] for ngram_length in range(min_n, min(len(extended_word), max_n) + 1): diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py index 73bbdbde94..5cf56fbcca 100644 --- a/gensim/models/utils_any2vec.py +++ b/gensim/models/utils_any2vec.py @@ -4,7 +4,7 @@ # Author: Shiva Manne # Copyright (C) 2018 RaRe Technologies s.r.o. -"""This module contains various general functions useful for any2vec models.""" +"""General functions used for any2vec models.""" import logging import numpy as np @@ -18,24 +18,25 @@ logger = logging.getLogger(__name__) try: - from gensim.models._utils_any2vec import ( - ft_hash as _ft_hash, - compute_ngrams as _compute_ngrams) + from gensim.models._utils_any2vec import ft_hash as _ft_hash, compute_ngrams as _compute_ngrams except ImportError: - # failed... fall back to plain python (~100x slower than the above) + FAST_VERSION = -1 + + # failed... fall back to plain python def _ft_hash(string): - """Reproduces [hash method](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc) - used in [1]_. + """Calculate hash based on `string`. + Reproduce `hash method from Facebook fastText implementation + `_. - Parameter - --------- + Parameters + ---------- string : str - The string whose hash needs to be calculated + The string whose hash needs to be calculated. Returns ------- int - The hash of the string + The hash of the string. """ # Runtime warnings for integer overflow are raised, this is expected behaviour. These warnings are suppressed. @@ -48,21 +49,21 @@ def _ft_hash(string): return h def _compute_ngrams(word, min_n, max_n): - """Returns the list of all possible ngrams for a given word. + """Get the list of all possible ngrams for a given word. Parameters ---------- word : str - The word whose ngrams need to be computed + The word whose ngrams need to be computed. min_n : int - minimum character length of the ngrams + Minimum character length of the ngrams. max_n : int - maximum character length of the ngrams + Maximum character length of the ngrams. Returns ------- - :obj:`list` of :obj:`str` - List of character ngrams + list of str + Sequence of character ngrams. """ BOW, EOW = ('<', '>') # Used by FastText to attach to all words as prefix and suffix