From a17266aa4415c8fab6f6ce02264e71ab4f74eccf Mon Sep 17 00:00:00 2001 From: synodriver Date: Thu, 11 Aug 2022 23:06:28 +0800 Subject: [PATCH 01/18] use nogil, prange and typedview to speedup --- pyopenjtalk/htsengine.pyx | 86 ++++++++----- pyopenjtalk/htsengine/__init__.pxd | 2 +- pyopenjtalk/openjtalk.pyx | 162 ++++++++++++++----------- pyopenjtalk/openjtalk/jpcommon.pxd | 24 ++-- pyopenjtalk/openjtalk/mecab.pxd | 2 +- pyopenjtalk/openjtalk/mecab2njd.pxd | 2 +- pyopenjtalk/openjtalk/njd.pxd | 14 +-- pyopenjtalk/openjtalk/njd2jpcommon.pxd | 2 +- pyopenjtalk/openjtalk/text2mecab.pxd | 2 +- 9 files changed, 167 insertions(+), 129 deletions(-) diff --git a/pyopenjtalk/htsengine.pyx b/pyopenjtalk/htsengine.pyx index a69400c..5a2aa9e 100644 --- a/pyopenjtalk/htsengine.pyx +++ b/pyopenjtalk/htsengine.pyx @@ -8,7 +8,9 @@ cimport numpy as np np.import_array() cimport cython -from libc.stdlib cimport malloc, free +from cython.parallel cimport prange +from cpython.mem cimport PyMem_Malloc, PyMem_Free +from libc.stdint cimport uint8_t from htsengine cimport HTS_Engine from htsengine cimport ( @@ -19,7 +21,10 @@ from htsengine cimport ( HTS_Engine_get_generated_speech, HTS_Engine_get_nsamples ) -cdef class HTSEngine(object): +@cython.final +@cython.no_gc +@cython.freelist(4) +cdef class HTSEngine: """HTSEngine Args: @@ -36,38 +41,46 @@ cdef class HTSEngine(object): self.clear() raise RuntimeError("Failed to initalize HTS_Engine") - def load(self, bytes voice): - cdef char* voices = voice + cpdef inline char load(self, const uint8_t[::1] voice): cdef char ret - ret = HTS_Engine_load(self.engine, &voices, 1) + with nogil: + ret = HTS_Engine_load(self.engine, &(&voice[0]), 1) return ret - def get_sampling_frequency(self): + cpdef inline size_t get_sampling_frequency(self): """Get sampling frequency """ - return HTS_Engine_get_sampling_frequency(self.engine) + cdef size_t ret + with nogil: + ret = HTS_Engine_get_sampling_frequency(self.engine) + return ret - def get_fperiod(self): + cpdef inline size_t get_fperiod(self): """Get frame period""" - return HTS_Engine_get_fperiod(self.engine) + cdef size_t ret + with nogil: + ret = HTS_Engine_get_fperiod(self.engine) + return ret - def set_speed(self, speed=1.0): + cpdef inline void set_speed(self, double speed=1.0): """Set speed Args: speed (float): speed """ - HTS_Engine_set_speed(self.engine, speed) + with nogil: + HTS_Engine_set_speed(self.engine, speed) - def add_half_tone(self, half_tone=0.0): + cpdef inline void add_half_tone(self, double half_tone=0.0): """Additional half tone in log-f0 Args: half_tone (float): additional half tone """ - HTS_Engine_add_half_tone(self.engine, half_tone) + with nogil: + HTS_Engine_add_half_tone(self.engine, half_tone) - def synthesize(self, list labels): + cpdef inline np.ndarray[np.float64_t, ndim=1] synthesize(self, list labels): """Synthesize waveform from list of full-context labels Args: @@ -77,40 +90,49 @@ cdef class HTSEngine(object): np.ndarray: speech waveform """ self.synthesize_from_strings(labels) - x = self.get_generated_speech() + cdef np.ndarray[np.float64_t, ndim=1] x = self.get_generated_speech() self.refresh() return x - def synthesize_from_strings(self, list labels): + cpdef inline char synthesize_from_strings(self, list labels) except? 0: """Synthesize from strings""" cdef size_t num_lines = len(labels) - cdef char **lines = malloc((num_lines + 1) * sizeof(char*)) + cdef char **lines = PyMem_Malloc((num_lines + 1) * sizeof(char*)) + cdef int n for n in range(len(labels)): lines[n] = labels[n] - - cdef char ret = HTS_Engine_synthesize_from_strings(self.engine, lines, num_lines) - free(lines) + cdef char ret + with nogil: + ret = HTS_Engine_synthesize_from_strings(self.engine, lines, num_lines) + PyMem_Free(lines) # todo: use finally if ret != 1: raise RuntimeError("Failed to run synthesize_from_strings") + return ret - def get_generated_speech(self): + cpdef inline np.ndarray[np.float64_t, ndim=1] get_generated_speech(self): """Get generated speech""" cdef size_t nsamples = HTS_Engine_get_nsamples(self.engine) - cdef np.ndarray speech = np.zeros([nsamples], dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] speech = np.zeros([nsamples], dtype=np.float64) + cdef double[::1] speech_view = speech cdef size_t index - for index in range(nsamples): - speech[index] = HTS_Engine_get_generated_speech(self.engine, index) + for index in prange(nsamples, nogil=True): + speech_view[index] = HTS_Engine_get_generated_speech(self.engine, index) return speech - def get_fullcontext_label_format(self): + cpdef inline str get_fullcontext_label_format(self): """Get full-context label format""" - return (HTS_Engine_get_fullcontext_label_format(self.engine)).decode("utf-8") - - def refresh(self): - HTS_Engine_refresh(self.engine) - - def clear(self): - HTS_Engine_clear(self.engine) + cdef const char* f + with nogil: + f = HTS_Engine_get_fullcontext_label_format(self.engine) + return (f).decode("utf-8") + + cpdef inline void refresh(self): + with nogil: + HTS_Engine_refresh(self.engine) + + cpdef inline void clear(self): + with nogil: + HTS_Engine_clear(self.engine) def __dealloc__(self): self.clear() diff --git a/pyopenjtalk/htsengine/__init__.pxd b/pyopenjtalk/htsengine/__init__.pxd index c24d959..fc8f70c 100644 --- a/pyopenjtalk/htsengine/__init__.pxd +++ b/pyopenjtalk/htsengine/__init__.pxd @@ -1,7 +1,7 @@ # distutils: language = c++ -cdef extern from "HTS_engine.h": +cdef extern from "HTS_engine.h" nogil: cdef cppclass _HTS_Engine: pass ctypedef _HTS_Engine HTS_Engine diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 46e6004..4dd3124 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -2,12 +2,14 @@ # cython: boundscheck=True, wraparound=True # cython: c_string_type=unicode, c_string_encoding=ascii +from libc.stdint cimport uint8_t import numpy as np cimport numpy as np np.import_array() cimport cython +from cpython.bytes cimport PyBytes_AS_STRING from openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis from openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear @@ -21,77 +23,80 @@ from openjtalk.text2mecab cimport text2mecab from openjtalk.mecab2njd cimport mecab2njd from openjtalk.njd2jpcommon cimport njd2jpcommon -cdef njd_node_get_string(_njd.NJDNode* node): +cdef inline str njd_node_get_string(_njd.NJDNode* node): return ((_njd.NJDNode_get_string(node))).decode("utf-8") -cdef njd_node_get_pos(_njd.NJDNode* node): +cdef inline str njd_node_get_pos(_njd.NJDNode* node): return ((_njd.NJDNode_get_pos(node))).decode("utf-8") -cdef njd_node_get_pos_group1(_njd.NJDNode* node): +cdef inline str njd_node_get_pos_group1(_njd.NJDNode* node): return ((_njd.NJDNode_get_pos_group1(node))).decode("utf-8") -cdef njd_node_get_pos_group2(_njd.NJDNode* node): +cdef inline str njd_node_get_pos_group2(_njd.NJDNode* node): return ((_njd.NJDNode_get_pos_group2(node))).decode("utf-8") -cdef njd_node_get_pos_group3(_njd.NJDNode* node): +cdef inline str njd_node_get_pos_group3(_njd.NJDNode* node): return ((_njd.NJDNode_get_pos_group3(node))).decode("utf-8") -cdef njd_node_get_ctype(_njd.NJDNode* node): +cdef inline str njd_node_get_ctype(_njd.NJDNode* node): return ((_njd.NJDNode_get_ctype(node))).decode("utf-8") -cdef njd_node_get_cform(_njd.NJDNode* node): +cdef inline str njd_node_get_cform(_njd.NJDNode* node): return ((_njd.NJDNode_get_cform(node))).decode("utf-8") -cdef njd_node_get_orig(_njd.NJDNode* node): +cdef inline str njd_node_get_orig(_njd.NJDNode* node): return ((_njd.NJDNode_get_orig(node))).decode("utf-8") -cdef njd_node_get_read(_njd.NJDNode* node): +cdef inline str njd_node_get_read(_njd.NJDNode* node): return ((_njd.NJDNode_get_read(node))).decode("utf-8") -cdef njd_node_get_pron(_njd.NJDNode* node): +cdef inline str njd_node_get_pron(_njd.NJDNode* node): return ((_njd.NJDNode_get_pron(node))).decode("utf-8") -cdef njd_node_get_acc(_njd.NJDNode* node): +cdef inline str njd_node_get_acc(_njd.NJDNode* node): return _njd.NJDNode_get_acc(node) -cdef njd_node_get_mora_size(_njd.NJDNode* node): +cdef inline str njd_node_get_mora_size(_njd.NJDNode* node): return _njd.NJDNode_get_mora_size(node) -cdef njd_node_get_chain_rule(_njd.NJDNode* node): +cdef inline str njd_node_get_chain_rule(_njd.NJDNode* node): return ((_njd.NJDNode_get_chain_rule(node))).decode("utf-8") -cdef njd_node_get_chain_flag(_njd.NJDNode* node): - return _njd.NJDNode_get_chain_flag(node) - - -cdef njd_node_print(_njd.NJDNode* node): - return "{},{},{},{},{},{},{},{},{},{},{}/{},{},{}".format( - njd_node_get_string(node), - njd_node_get_pos(node), - njd_node_get_pos_group1(node), - njd_node_get_pos_group2(node), - njd_node_get_pos_group3(node), - njd_node_get_ctype(node), - njd_node_get_cform(node), - njd_node_get_orig(node), - njd_node_get_read(node), - njd_node_get_pron(node), - njd_node_get_acc(node), - njd_node_get_mora_size(node), - njd_node_get_chain_rule(node), - njd_node_get_chain_flag(node) +cdef inline str njd_node_get_chain_flag(_njd.NJDNode* node): + return _njd.NJDNode_get_chain_flag(node) + + +cdef str njd_node_print(_njd.NJDNode* node): + return "{},{},{},{},{},{},{},{},{},{},{}/{},{},{}".format( + njd_node_get_string(node), + njd_node_get_pos(node), + njd_node_get_pos_group1(node), + njd_node_get_pos_group2(node), + njd_node_get_pos_group3(node), + njd_node_get_ctype(node), + njd_node_get_cform(node), + njd_node_get_orig(node), + njd_node_get_read(node), + njd_node_get_pron(node), + njd_node_get_acc(node), + njd_node_get_mora_size(node), + njd_node_get_chain_rule(node), + njd_node_get_chain_flag(node) ) -cdef njd_print(_njd.NJD* njd): +cdef list njd_print(_njd.NJD* njd): cdef _njd.NJDNode* node = njd.head njd_results = [] while node is not NULL: - njd_results.append(njd_node_print(node)) - node = node.next + njd_results.append(njd_node_print(node)) + node = node.next return njd_results -cdef class OpenJTalk(object): +@cython.no_gc +@cython.final +@cython.freelist(4) +cdef class OpenJTalk: """OpenJTalk Args: @@ -112,62 +117,73 @@ cdef class OpenJTalk(object): r = self._load(dn_mecab) if r != 1: - self._clear() - raise RuntimeError("Failed to initalize Mecab") + self._clear() + raise RuntimeError("Failed to initalize Mecab") + cpdef inline void _clear(self): + with nogil: + Mecab_clear(self.mecab) + NJD_clear(self.njd) + JPCommon_clear(self.jpcommon) - def _clear(self): - Mecab_clear(self.mecab) - NJD_clear(self.njd) - JPCommon_clear(self.jpcommon) + cpdef inline int _load(self, const uint8_t[::1] dn_mecab): + cdef int ret + with nogil: + ret = Mecab_load(self.mecab, &dn_mecab[0]) + return ret - def _load(self, bytes dn_mecab): - return Mecab_load(self.mecab, dn_mecab) - - def run_frontend(self, text, verbose=0): + cpdef inline tuple run_frontend(self, object text, int verbose=0): """Run OpenJTalk's text processing frontend """ if isinstance(text, str): - text = text.encode("utf-8") - cdef char buff[8192] - text2mecab(buff, text) - Mecab_analysis(self.mecab, buff) - mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab)) - _njd.njd_set_pronunciation(self.njd) - _njd.njd_set_digit(self.njd) - _njd.njd_set_accent_phrase(self.njd) - _njd.njd_set_accent_type(self.njd) - _njd.njd_set_unvoiced_vowel(self.njd) - _njd.njd_set_long_vowel(self.njd) - njd2jpcommon(self.jpcommon, self.njd) - JPCommon_make_label(self.jpcommon) - - cdef int label_size = JPCommon_get_label_size(self.jpcommon) - cdef char** label_feature - label_feature = JPCommon_get_label_feature(self.jpcommon) + text = text.encode("utf-8") + cdef: + char buff[8192] + const char* text_ptr + int label_size + char** label_feature + text_ptr = PyBytes_AS_STRING(text) + with nogil: + text2mecab(buff, text_ptr) + Mecab_analysis(self.mecab, buff) + mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab)) + _njd.njd_set_pronunciation(self.njd) + _njd.njd_set_digit(self.njd) + _njd.njd_set_accent_phrase(self.njd) + _njd.njd_set_accent_type(self.njd) + _njd.njd_set_unvoiced_vowel(self.njd) + _njd.njd_set_long_vowel(self.njd) + njd2jpcommon(self.jpcommon, self.njd) + JPCommon_make_label(self.jpcommon) + + label_size = JPCommon_get_label_size(self.jpcommon) + JPCommon_get_label_feature(self.jpcommon) labels = [] + cdef int i for i in range(label_size): - # This will create a copy of c string - # http://cython.readthedocs.io/en/latest/src/tutorial/strings.html - labels.append(label_feature[i]) + # This will create a copy of c string + # http://cython.readthedocs.io/en/latest/src/tutorial/strings.html + labels.append(label_feature[i]) - njd_results = njd_print(self.njd) + cdef list njd_results = njd_print(self.njd) if verbose > 0: - NJD_print(self.njd) + NJD_print(self.njd) # Note that this will release memory for label feature - JPCommon_refresh(self.jpcommon) - NJD_refresh(self.njd) - Mecab_refresh(self.mecab) + with nogil: + JPCommon_refresh(self.jpcommon) + NJD_refresh(self.njd) + Mecab_refresh(self.mecab) return njd_results, labels - def g2p(self, text, kana=False, join=True): + cpdef inline g2p(self, object text, bint kana=False, bint join=True): """Grapheme-to-phoeneme (G2P) conversion """ + cdef list njd_results, labels njd_results, labels = self.run_frontend(text) if not kana: prons = list(map(lambda s: s.split("-")[1].split("+")[0], labels[1:-1])) diff --git a/pyopenjtalk/openjtalk/jpcommon.pxd b/pyopenjtalk/openjtalk/jpcommon.pxd index 8e86bea..b120fc1 100644 --- a/pyopenjtalk/openjtalk/jpcommon.pxd +++ b/pyopenjtalk/openjtalk/jpcommon.pxd @@ -2,21 +2,21 @@ from libc.stdio cimport FILE -cdef extern from "jpcommon.h": +cdef extern from "jpcommon.h" nogil: cdef cppclass JPCommonNode: - char *pron - char *pos - char *ctype - char *cform - int acc - int chain_flag - void *prev - void *next + char *pron + char *pos + char *ctype + char *cform + int acc + int chain_flag + void *prev + void *next cdef cppclass JPCommon: - JPCommonNode *head - JPCommonNode *tail - void *label + JPCommonNode *head + JPCommonNode *tail + void *label void JPCommon_initialize(JPCommon * jpcommon) void JPCommon_push(JPCommon * jpcommon, JPCommonNode * node) diff --git a/pyopenjtalk/openjtalk/mecab.pxd b/pyopenjtalk/openjtalk/mecab.pxd index bd367c7..2449db0 100644 --- a/pyopenjtalk/openjtalk/mecab.pxd +++ b/pyopenjtalk/openjtalk/mecab.pxd @@ -1,6 +1,6 @@ # distutils: language = c++ -cdef extern from "mecab.h": +cdef extern from "mecab.h" nogil: cdef cppclass Mecab: char **feature int size diff --git a/pyopenjtalk/openjtalk/mecab2njd.pxd b/pyopenjtalk/openjtalk/mecab2njd.pxd index be57ccc..a86d790 100644 --- a/pyopenjtalk/openjtalk/mecab2njd.pxd +++ b/pyopenjtalk/openjtalk/mecab2njd.pxd @@ -2,5 +2,5 @@ from .njd cimport NJD -cdef extern from "mecab2njd.h": +cdef extern from "mecab2njd.h" nogil: void mecab2njd(NJD * njd, char **feature, int size); diff --git a/pyopenjtalk/openjtalk/njd.pxd b/pyopenjtalk/openjtalk/njd.pxd index 38d3887..2831324 100644 --- a/pyopenjtalk/openjtalk/njd.pxd +++ b/pyopenjtalk/openjtalk/njd.pxd @@ -2,7 +2,7 @@ from libc.stdio cimport FILE -cdef extern from "njd.h": +cdef extern from "njd.h" nogil: cdef cppclass NJDNode: char *string char *pos @@ -78,20 +78,20 @@ cdef extern from "njd.h": void NJD_refresh(NJD * njd) void NJD_clear(NJD * wl) -cdef extern from "njd_set_accent_phrase.h": +cdef extern from "njd_set_accent_phrase.h" nogil: void njd_set_accent_phrase(NJD * njd) -cdef extern from "njd_set_accent_type.h": +cdef extern from "njd_set_accent_type.h" nogil: void njd_set_accent_type(NJD * njd) -cdef extern from "njd_set_digit.h": +cdef extern from "njd_set_digit.h" nogil: void njd_set_digit(NJD * njd) -cdef extern from "njd_set_long_vowel.h": +cdef extern from "njd_set_long_vowel.h" nogil: void njd_set_long_vowel(NJD * njd) -cdef extern from "njd_set_pronunciation.h": +cdef extern from "njd_set_pronunciation.h" nogil: void njd_set_pronunciation(NJD * njd) -cdef extern from "njd_set_unvoiced_vowel.h": +cdef extern from "njd_set_unvoiced_vowel.h" nogil: void njd_set_unvoiced_vowel(NJD * njd) diff --git a/pyopenjtalk/openjtalk/njd2jpcommon.pxd b/pyopenjtalk/openjtalk/njd2jpcommon.pxd index 8309288..4d6fb62 100644 --- a/pyopenjtalk/openjtalk/njd2jpcommon.pxd +++ b/pyopenjtalk/openjtalk/njd2jpcommon.pxd @@ -3,5 +3,5 @@ from .jpcommon cimport JPCommon from .njd cimport NJD -cdef extern from "njd2jpcommon.h": +cdef extern from "njd2jpcommon.h" nogil: void njd2jpcommon(JPCommon * jpcommon, NJD * njd) diff --git a/pyopenjtalk/openjtalk/text2mecab.pxd b/pyopenjtalk/openjtalk/text2mecab.pxd index 6081757..718f7a5 100644 --- a/pyopenjtalk/openjtalk/text2mecab.pxd +++ b/pyopenjtalk/openjtalk/text2mecab.pxd @@ -1,4 +1,4 @@ # distutils: language = c++ -cdef extern from "text2mecab.h": +cdef extern from "text2mecab.h" nogil: void text2mecab(char *output, const char *input) From 8eed18ceb04b0ef8b6dd8c8d3d271f19fe0dc273 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 03:35:59 +0000 Subject: [PATCH 02/18] speed up and fix omp --- pyopenjtalk/htsengine.pyx | 10 ++++++---- pyopenjtalk/openjtalk.pyx | 8 ++++---- setup.py | 8 ++++---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pyopenjtalk/htsengine.pyx b/pyopenjtalk/htsengine.pyx index 5a2aa9e..d2117f1 100644 --- a/pyopenjtalk/htsengine.pyx +++ b/pyopenjtalk/htsengine.pyx @@ -1,6 +1,6 @@ # coding: utf-8 -# cython: boundscheck=True, wraparound=True -# cython: c_string_type=unicode, c_string_encoding=ascii +# cython: boundscheck=False, wraparound=True +# cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True import numpy as np @@ -42,9 +42,11 @@ cdef class HTSEngine: raise RuntimeError("Failed to initalize HTS_Engine") cpdef inline char load(self, const uint8_t[::1] voice): - cdef char ret + cdef: + char ret + const uint8_t *voice_ptr = &voice[0] with nogil: - ret = HTS_Engine_load(self.engine, &(&voice[0]), 1) + ret = HTS_Engine_load(self.engine, (&voice_ptr), 1) return ret cpdef inline size_t get_sampling_frequency(self): diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 4dd3124..9bb198f 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -1,6 +1,6 @@ # coding: utf-8 -# cython: boundscheck=True, wraparound=True -# cython: c_string_type=unicode, c_string_encoding=ascii +# cython: boundscheck=False, wraparound=True +# cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True from libc.stdint cimport uint8_t import numpy as np @@ -66,7 +66,7 @@ cdef inline str njd_node_get_chain_flag(_njd.NJDNode* node): return _njd.NJDNode_get_chain_flag(node) -cdef str njd_node_print(_njd.NJDNode* node): +cdef inline str njd_node_print(_njd.NJDNode* node): return "{},{},{},{},{},{},{},{},{},{},{}/{},{},{}".format( njd_node_get_string(node), njd_node_get_pos(node), @@ -180,7 +180,7 @@ cdef class OpenJTalk: return njd_results, labels - cpdef inline g2p(self, object text, bint kana=False, bint join=True): + def g2p(self, object text, bint kana=False, bint join=True): """Grapheme-to-phoeneme (G2P) conversion """ cdef list njd_results, labels diff --git a/setup.py b/setup.py index c68db9c..971a08b 100644 --- a/setup.py +++ b/setup.py @@ -180,8 +180,8 @@ def escape_macros(macros): name="pyopenjtalk.openjtalk", sources=[join("pyopenjtalk", "openjtalk" + ext)] + all_src, include_dirs=[np.get_include()] + include_dirs, - extra_compile_args=[], - extra_link_args=[], + extra_compile_args=['-fopenmp'], + extra_link_args=['-fopenmp'], language="c++", define_macros=custom_define_macros( [ @@ -204,8 +204,8 @@ def escape_macros(macros): name="pyopenjtalk.htsengine", sources=[join("pyopenjtalk", "htsengine" + ext)] + all_htsengine_src, include_dirs=[np.get_include(), join(htsengine_src_top, "include")], - extra_compile_args=[], - extra_link_args=[], + extra_compile_args=['-fopenmp'], + extra_link_args=['-fopenmp'], libraries=["winmm"] if platform_is_windows else [], language="c++", define_macros=custom_define_macros( From 5af6b9d859800f5efe55c468b1967a533bc64d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 11:41:04 +0800 Subject: [PATCH 03/18] feat: add dict in setup time --- setup.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c68db9c..b7f8feb 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,10 @@ "/execution-charset:utf-8", ] +_dict_folder_name = "open_jtalk_dic_utf_8-1.11" +_dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1" +_DICT_URL = f"{_dict_download_url}/{_dict_folder_name}.tar.gz" + try: if not _CYTHON_INSTALLED: raise ImportError("No supported version of Cython installed.") @@ -138,6 +142,33 @@ def escape_macros(macros): # open_jtalk sources src_top = join("lib", "open_jtalk", "src") + +# https://github.com/tqdm/tqdm#hooks-and-callbacks +class _TqdmUpTo(tqdm): # type: ignore + def update_to(self, b=1, bsize=1, tsize=None): + if tsize is not None: + self.total = tsize + return self.update(b * bsize - self.n) + + +# extract dic +filename = "dic.tar.gz" +print('Downloading: "{}"'.format(_DICT_URL)) +with _TqdmUpTo( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + desc="dic.tar.gz", +) as t: # all optional kwargs + urlretrieve(_DICT_URL, filename, reporthook=t.update_to) + t.total = t.n +print("Extracting tar file {}".format(filename)) +with tarfile.open(filename, mode="r|gz") as f: + f.extractall(path="./") +os.remove(filename) + + # generate config.h for mecab # NOTE: need to run cmake to generate config.h # we could do it on python side but it would be very tricky, @@ -272,7 +303,7 @@ def run(self): url="https://github.com/r9y9/pyopenjtalk", license="MIT", packages=find_packages(), - package_data={"": ["htsvoice/*"]}, + package_data={"": ["htsvoice/*", f"{_dict_folder_name}/*"]}, ext_modules=ext_modules, cmdclass=cmdclass, install_requires=[ From 23641047b3321fc0dac2f8e21446291391555f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 11:45:38 +0800 Subject: [PATCH 04/18] add imports --- setup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup.py b/setup.py index b7f8feb..ba0e767 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,15 @@ import setuptools.command.develop from setuptools import Extension, find_packages, setup +import six +from tqdm.auto import tqdm +if six.PY2: + from urllib import urlretrieve +else: + from urllib.request import urlretrieve +import tarfile + + platform_is_windows = sys.platform == "win32" version = "0.3.0" From 94b8b10e09a16d7b91f7aaad95c764fa9aad92e5 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 08:54:07 +0000 Subject: [PATCH 05/18] fix coredump --- pyopenjtalk/htsengine.pyx | 7 +++--- pyopenjtalk/htsengine/__init__.pxd | 2 ++ pyopenjtalk/openjtalk.pyx | 34 +++++++++++++------------- pyopenjtalk/openjtalk/__init__.pxd | 1 + pyopenjtalk/openjtalk/jpcommon.pxd | 1 + pyopenjtalk/openjtalk/mecab.pxd | 1 + pyopenjtalk/openjtalk/mecab2njd.pxd | 1 + pyopenjtalk/openjtalk/njd.pxd | 1 + pyopenjtalk/openjtalk/njd2jpcommon.pxd | 1 + pyopenjtalk/openjtalk/text2mecab.pxd | 1 + 10 files changed, 30 insertions(+), 20 deletions(-) diff --git a/pyopenjtalk/htsengine.pyx b/pyopenjtalk/htsengine.pyx index d2117f1..868c07a 100644 --- a/pyopenjtalk/htsengine.pyx +++ b/pyopenjtalk/htsengine.pyx @@ -1,5 +1,6 @@ # coding: utf-8 -# cython: boundscheck=False, wraparound=True +# cython: language_level=3 +# cython: boundscheck=False, wraparound=False # cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True import numpy as np @@ -12,8 +13,8 @@ from cython.parallel cimport prange from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.stdint cimport uint8_t -from htsengine cimport HTS_Engine -from htsengine cimport ( +from pyopenjtalk.htsengine cimport HTS_Engine +from pyopenjtalk.htsengine cimport ( HTS_Engine_initialize, HTS_Engine_load, HTS_Engine_clear, HTS_Engine_refresh, HTS_Engine_get_sampling_frequency, HTS_Engine_get_fperiod, HTS_Engine_set_speed, HTS_Engine_add_half_tone, diff --git a/pyopenjtalk/htsengine/__init__.pxd b/pyopenjtalk/htsengine/__init__.pxd index fc8f70c..e033ea5 100644 --- a/pyopenjtalk/htsengine/__init__.pxd +++ b/pyopenjtalk/htsengine/__init__.pxd @@ -1,6 +1,8 @@ # distutils: language = c++ +# cython: language_level=3 + cdef extern from "HTS_engine.h" nogil: cdef cppclass _HTS_Engine: pass diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 9bb198f..1ef7923 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -1,5 +1,6 @@ # coding: utf-8 -# cython: boundscheck=False, wraparound=True +# cython: language_level=3 +# cython: boundscheck=False, wraparound=False # cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True from libc.stdint cimport uint8_t @@ -11,17 +12,17 @@ np.import_array() cimport cython from cpython.bytes cimport PyBytes_AS_STRING -from openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis -from openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear -from openjtalk.njd cimport NJD, NJD_initialize, NJD_refresh, NJD_print, NJD_clear -from openjtalk cimport njd as _njd -from openjtalk.jpcommon cimport JPCommon, JPCommon_initialize,JPCommon_make_label -from openjtalk.jpcommon cimport JPCommon_get_label_size, JPCommon_get_label_feature -from openjtalk.jpcommon cimport JPCommon_refresh, JPCommon_clear -from openjtalk cimport njd2jpcommon -from openjtalk.text2mecab cimport text2mecab -from openjtalk.mecab2njd cimport mecab2njd -from openjtalk.njd2jpcommon cimport njd2jpcommon +from pyopenjtalk.openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis +from pyopenjtalk.openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear +from pyopenjtalk.openjtalk.njd cimport NJD, NJD_initialize, NJD_refresh, NJD_print, NJD_clear +from pyopenjtalk.openjtalk cimport njd as _njd +from pyopenjtalk.openjtalk.jpcommon cimport JPCommon, JPCommon_initialize,JPCommon_make_label +from pyopenjtalk.openjtalk.jpcommon cimport JPCommon_get_label_size, JPCommon_get_label_feature +from pyopenjtalk.openjtalk.jpcommon cimport JPCommon_refresh, JPCommon_clear +from pyopenjtalk.openjtalk cimport njd2jpcommon +from pyopenjtalk.openjtalk.text2mecab cimport text2mecab +from pyopenjtalk.openjtalk.mecab2njd cimport mecab2njd +from pyopenjtalk.openjtalk.njd2jpcommon cimport njd2jpcommon cdef inline str njd_node_get_string(_njd.NJDNode* node): return ((_njd.NJDNode_get_string(node))).decode("utf-8") @@ -53,16 +54,16 @@ cdef inline str njd_node_get_read(_njd.NJDNode* node): cdef inline str njd_node_get_pron(_njd.NJDNode* node): return ((_njd.NJDNode_get_pron(node))).decode("utf-8") -cdef inline str njd_node_get_acc(_njd.NJDNode* node): +cdef inline int njd_node_get_acc(_njd.NJDNode* node): return _njd.NJDNode_get_acc(node) -cdef inline str njd_node_get_mora_size(_njd.NJDNode* node): +cdef inline int njd_node_get_mora_size(_njd.NJDNode* node): return _njd.NJDNode_get_mora_size(node) cdef inline str njd_node_get_chain_rule(_njd.NJDNode* node): return ((_njd.NJDNode_get_chain_rule(node))).decode("utf-8") -cdef inline str njd_node_get_chain_flag(_njd.NJDNode* node): +cdef inline int njd_node_get_chain_flag(_njd.NJDNode* node): return _njd.NJDNode_get_chain_flag(node) @@ -158,7 +159,7 @@ cdef class OpenJTalk: JPCommon_make_label(self.jpcommon) label_size = JPCommon_get_label_size(self.jpcommon) - JPCommon_get_label_feature(self.jpcommon) + label_feature = JPCommon_get_label_feature(self.jpcommon) labels = [] cdef int i @@ -177,7 +178,6 @@ cdef class OpenJTalk: JPCommon_refresh(self.jpcommon) NJD_refresh(self.njd) Mecab_refresh(self.mecab) - return njd_results, labels def g2p(self, object text, bint kana=False, bint join=True): diff --git a/pyopenjtalk/openjtalk/__init__.pxd b/pyopenjtalk/openjtalk/__init__.pxd index e69de29..019523c 100644 --- a/pyopenjtalk/openjtalk/__init__.pxd +++ b/pyopenjtalk/openjtalk/__init__.pxd @@ -0,0 +1 @@ +# cython: language_level=3 \ No newline at end of file diff --git a/pyopenjtalk/openjtalk/jpcommon.pxd b/pyopenjtalk/openjtalk/jpcommon.pxd index b120fc1..2dd450a 100644 --- a/pyopenjtalk/openjtalk/jpcommon.pxd +++ b/pyopenjtalk/openjtalk/jpcommon.pxd @@ -1,4 +1,5 @@ # distutils: language = c++ +# cython: language_level=3 from libc.stdio cimport FILE diff --git a/pyopenjtalk/openjtalk/mecab.pxd b/pyopenjtalk/openjtalk/mecab.pxd index 2449db0..2aa39e5 100644 --- a/pyopenjtalk/openjtalk/mecab.pxd +++ b/pyopenjtalk/openjtalk/mecab.pxd @@ -1,4 +1,5 @@ # distutils: language = c++ +# cython: language_level=3 cdef extern from "mecab.h" nogil: cdef cppclass Mecab: diff --git a/pyopenjtalk/openjtalk/mecab2njd.pxd b/pyopenjtalk/openjtalk/mecab2njd.pxd index a86d790..d8fb8c4 100644 --- a/pyopenjtalk/openjtalk/mecab2njd.pxd +++ b/pyopenjtalk/openjtalk/mecab2njd.pxd @@ -1,4 +1,5 @@ # distutils: language = c++ +# cython: language_level=3 from .njd cimport NJD diff --git a/pyopenjtalk/openjtalk/njd.pxd b/pyopenjtalk/openjtalk/njd.pxd index 2831324..abc1f2d 100644 --- a/pyopenjtalk/openjtalk/njd.pxd +++ b/pyopenjtalk/openjtalk/njd.pxd @@ -1,4 +1,5 @@ # distutils: language = c++ +# cython: language_level=3 from libc.stdio cimport FILE diff --git a/pyopenjtalk/openjtalk/njd2jpcommon.pxd b/pyopenjtalk/openjtalk/njd2jpcommon.pxd index 4d6fb62..680aadf 100644 --- a/pyopenjtalk/openjtalk/njd2jpcommon.pxd +++ b/pyopenjtalk/openjtalk/njd2jpcommon.pxd @@ -1,4 +1,5 @@ # distutils: language = c++ +# cython: language_level=3 from .jpcommon cimport JPCommon from .njd cimport NJD diff --git a/pyopenjtalk/openjtalk/text2mecab.pxd b/pyopenjtalk/openjtalk/text2mecab.pxd index 718f7a5..0190f1a 100644 --- a/pyopenjtalk/openjtalk/text2mecab.pxd +++ b/pyopenjtalk/openjtalk/text2mecab.pxd @@ -1,4 +1,5 @@ # distutils: language = c++ +# cython: language_level=3 cdef extern from "text2mecab.h" nogil: void text2mecab(char *output, const char *input) From 8bde56c0219805b2b0e606e816c7cdcfe9253dcd Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 09:06:55 +0000 Subject: [PATCH 06/18] add build ci --- .github/workflows/build_whl.yml | 37 ++++++++++++++++++++++++++++++++ .github/workflows/upload.yml | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 .github/workflows/build_whl.yml create mode 100644 .github/workflows/upload.yml diff --git a/.github/workflows/build_whl.yml b/.github/workflows/build_whl.yml new file mode 100644 index 0000000..6799c9a --- /dev/null +++ b/.github/workflows/build_whl.yml @@ -0,0 +1,37 @@ +name: build wheel + +on: + workflow_dispatch: + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + os: [ubuntu-latest, macos-latest, windows-latest] + fail-fast: false + + steps: + - uses: actions/checkout@v2 + - name: Check out recursively + run: git submodule update --init --recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools + python -m pip install --upgrade wheel + pip install flake8 pytest + pip install cython numpy tqdm six + - name: build_whl + run: | + python setup.py sdist bdist_wheel + - uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.os }}-${{ matrix.python-version }} + path: dist/*.whl \ No newline at end of file diff --git a/.github/workflows/upload.yml b/.github/workflows/upload.yml new file mode 100644 index 0000000..748a021 --- /dev/null +++ b/.github/workflows/upload.yml @@ -0,0 +1,38 @@ +name: upload + +on: + workflow_dispatch: + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + os: [ubuntu-latest, macos-latest, windows-latest] + fail-fast: false + + steps: + - uses: actions/checkout@v2 + - name: Check out recursively + run: git submodule update --init --recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools + python -m pip install --upgrade wheel + pip install flake8 pytest + pip install twine + pip install cython numpy tqdm six + - name: build_whl + run: | + python setup.py sdist bdist_wheel + python changename.py + - name: Publish package + run: | + twine upload dist/* -u ${{ secrets.PYPI_USER }} -p ${{ secrets.PYPI_PASSWORD }} \ No newline at end of file From bcfa63a2b722f6ccd08d1aa725b40669373bdf78 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 09:19:29 +0000 Subject: [PATCH 07/18] fix macox build --- pyopenjtalk/htsengine.pyx | 4 ++-- pyopenjtalk/openjtalk.pyx | 2 +- setup.py | 27 +++++++++++++++++++++++---- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pyopenjtalk/htsengine.pyx b/pyopenjtalk/htsengine.pyx index 868c07a..cdc6018 100644 --- a/pyopenjtalk/htsengine.pyx +++ b/pyopenjtalk/htsengine.pyx @@ -117,9 +117,9 @@ cdef class HTSEngine: cdef size_t nsamples = HTS_Engine_get_nsamples(self.engine) cdef np.ndarray[np.float64_t, ndim=1] speech = np.zeros([nsamples], dtype=np.float64) cdef double[::1] speech_view = speech - cdef size_t index + cdef int index for index in prange(nsamples, nogil=True): - speech_view[index] = HTS_Engine_get_generated_speech(self.engine, index) + speech_view[index] = HTS_Engine_get_generated_speech(self.engine, index) return speech cpdef inline str get_fullcontext_label_format(self): diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 1ef7923..4f37112 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -1,6 +1,6 @@ # coding: utf-8 # cython: language_level=3 -# cython: boundscheck=False, wraparound=False +# cython: boundscheck=False, wraparound=True # cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True from libc.stdint cimport uint8_t diff --git a/setup.py b/setup.py index 971a08b..6308e10 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import os import subprocess import sys +import platform from distutils.errors import DistutilsExecError from distutils.spawn import spawn from distutils.version import LooseVersion @@ -70,6 +71,24 @@ def build_extensions(self): if not os.path.exists(join("pyopenjtalk", "openjtalk" + ext)): raise RuntimeError("Cython is required to generate C++ code") +# make openmp available +system = platform.system() +if system == "Windows": + extra_compile_args = [] + extra_link_args = ['/openmp'] +elif system == "Linux": + extra_compile_args = ['-fopenmp'] + extra_link_args = ['-fopenmp'] +elif system == "Darwin": + os.system("brew install llvm libomp") + os.system("brew install clang-omp") + # os.environ["CPP"] = "/usr/local/opt/llvm/bin/clang" + extra_compile_args = [] + extra_link_args = [] +else: + extra_compile_args = ['-fopenmp'] + extra_link_args = ['-fopenmp'] + # Workaround for `distutils.spawn` problem on Windows python < 3.9 # See details: [bpo-39763: distutils.spawn now uses subprocess (GH-18743)] @@ -180,8 +199,8 @@ def escape_macros(macros): name="pyopenjtalk.openjtalk", sources=[join("pyopenjtalk", "openjtalk" + ext)] + all_src, include_dirs=[np.get_include()] + include_dirs, - extra_compile_args=['-fopenmp'], - extra_link_args=['-fopenmp'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, language="c++", define_macros=custom_define_macros( [ @@ -204,8 +223,8 @@ def escape_macros(macros): name="pyopenjtalk.htsengine", sources=[join("pyopenjtalk", "htsengine" + ext)] + all_htsengine_src, include_dirs=[np.get_include(), join(htsengine_src_top, "include")], - extra_compile_args=['-fopenmp'], - extra_link_args=['-fopenmp'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, libraries=["winmm"] if platform_is_windows else [], language="c++", define_macros=custom_define_macros( From d351b356ebb5ed790099ecd53fccb49d76004008 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 09:19:29 +0000 Subject: [PATCH 08/18] fix macox build --- pyopenjtalk/htsengine.pyx | 4 ++-- pyopenjtalk/openjtalk.pyx | 2 +- setup.py | 27 +++++++++++++++++++++++---- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pyopenjtalk/htsengine.pyx b/pyopenjtalk/htsengine.pyx index 868c07a..cdc6018 100644 --- a/pyopenjtalk/htsengine.pyx +++ b/pyopenjtalk/htsengine.pyx @@ -117,9 +117,9 @@ cdef class HTSEngine: cdef size_t nsamples = HTS_Engine_get_nsamples(self.engine) cdef np.ndarray[np.float64_t, ndim=1] speech = np.zeros([nsamples], dtype=np.float64) cdef double[::1] speech_view = speech - cdef size_t index + cdef int index for index in prange(nsamples, nogil=True): - speech_view[index] = HTS_Engine_get_generated_speech(self.engine, index) + speech_view[index] = HTS_Engine_get_generated_speech(self.engine, index) return speech cpdef inline str get_fullcontext_label_format(self): diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 1ef7923..4f37112 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -1,6 +1,6 @@ # coding: utf-8 # cython: language_level=3 -# cython: boundscheck=False, wraparound=False +# cython: boundscheck=False, wraparound=True # cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True from libc.stdint cimport uint8_t diff --git a/setup.py b/setup.py index 971a08b..8a6ddcc 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import os import subprocess import sys +import platform from distutils.errors import DistutilsExecError from distutils.spawn import spawn from distutils.version import LooseVersion @@ -70,6 +71,24 @@ def build_extensions(self): if not os.path.exists(join("pyopenjtalk", "openjtalk" + ext)): raise RuntimeError("Cython is required to generate C++ code") +# make openmp available +system = platform.system() +if system == "Windows": + extra_compile_args = [] + extra_link_args = ['/openmp'] +elif system == "Linux": + extra_compile_args = ['-fopenmp'] + extra_link_args = ['-fopenmp'] +elif system == "Darwin": + os.system("brew install llvm libomp") + os.system("brew install clang-omp") + # os.environ["CPP"] = "/usr/local/opt/llvm/bin/clang" + extra_compile_args = ["-Xpreprocessor", "-fopenmp"] + extra_link_args = ["-Xpreprocessor", "-fopenmp"] +else: + extra_compile_args = ['-fopenmp'] + extra_link_args = ['-fopenmp'] + # Workaround for `distutils.spawn` problem on Windows python < 3.9 # See details: [bpo-39763: distutils.spawn now uses subprocess (GH-18743)] @@ -180,8 +199,8 @@ def escape_macros(macros): name="pyopenjtalk.openjtalk", sources=[join("pyopenjtalk", "openjtalk" + ext)] + all_src, include_dirs=[np.get_include()] + include_dirs, - extra_compile_args=['-fopenmp'], - extra_link_args=['-fopenmp'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, language="c++", define_macros=custom_define_macros( [ @@ -204,8 +223,8 @@ def escape_macros(macros): name="pyopenjtalk.htsengine", sources=[join("pyopenjtalk", "htsengine" + ext)] + all_htsengine_src, include_dirs=[np.get_include(), join(htsengine_src_top, "include")], - extra_compile_args=['-fopenmp'], - extra_link_args=['-fopenmp'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, libraries=["winmm"] if platform_is_windows else [], language="c++", define_macros=custom_define_macros( From b3de8752bc13593b1a2c634c023c117cae6e6301 Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 21:02:58 +0800 Subject: [PATCH 09/18] fix args --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3eaab0a..36fae85 100644 --- a/setup.py +++ b/setup.py @@ -93,8 +93,8 @@ def build_extensions(self): extra_compile_args = ['-fopenmp'] extra_link_args = ['-fopenmp'] elif system == "Darwin": - os.system("brew install libomp") - extra_compile_args = ['-Xpreprocessor -fopenmp'] + os.system('brew install libomp') + extra_compile_args = ['-Xpreprocessor', '-fopenmp'] extra_link_args = ['-lomp'] else: extra_compile_args = ['-fopenmp'] From acbebe5bb78107a0ded15a21a5933e7bf7974bf4 Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 21:03:48 +0800 Subject: [PATCH 10/18] add ignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4d3b0cb..8b2bc54 100644 --- a/.gitignore +++ b/.gitignore @@ -194,3 +194,4 @@ Temporary Items # Linux trash folder which might appear on any partition or disk .Trash-* +dic.tar.gz From a342129e6e4c65718c0a210a04e479b05b012efb Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 21:07:22 +0800 Subject: [PATCH 11/18] add link arg --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 36fae85..ecb4117 100644 --- a/setup.py +++ b/setup.py @@ -95,7 +95,7 @@ def build_extensions(self): elif system == "Darwin": os.system('brew install libomp') extra_compile_args = ['-Xpreprocessor', '-fopenmp'] - extra_link_args = ['-lomp'] + extra_link_args = ['-L/usr/local/lib', '-lomp'] else: extra_compile_args = ['-fopenmp'] extra_link_args = ['-fopenmp'] @@ -176,7 +176,7 @@ def update_to(self, b=1, bsize=1, tsize=None): self.total = tsize return self.update(b * bsize - self.n) - +""" # extract dic filename = "dic.tar.gz" print('Downloading: "{}"'.format(_DICT_URL)) @@ -193,7 +193,7 @@ def update_to(self, b=1, bsize=1, tsize=None): with tarfile.open(filename, mode="r|gz") as f: f.extractall(path="./") os.remove(filename) - +""" # generate config.h for mecab # NOTE: need to run cmake to generate config.h @@ -329,7 +329,7 @@ def run(self): url="https://github.com/r9y9/pyopenjtalk", license="MIT", packages=find_packages(), - package_data={"": ["htsvoice/*", f"{_dict_folder_name}/*"]}, + package_data={"": ["htsvoice/*"]}, ext_modules=ext_modules, cmdclass=cmdclass, install_requires=[ From 13d1d4bbb5f5a7e34606bcebdb47ac349e377a07 Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 21:25:14 +0800 Subject: [PATCH 12/18] r --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index ecb4117..ec27e14 100644 --- a/setup.py +++ b/setup.py @@ -176,7 +176,6 @@ def update_to(self, b=1, bsize=1, tsize=None): self.total = tsize return self.update(b * bsize - self.n) -""" # extract dic filename = "dic.tar.gz" print('Downloading: "{}"'.format(_DICT_URL)) @@ -193,7 +192,6 @@ def update_to(self, b=1, bsize=1, tsize=None): with tarfile.open(filename, mode="r|gz") as f: f.extractall(path="./") os.remove(filename) -""" # generate config.h for mecab # NOTE: need to run cmake to generate config.h From 1fc6f63a40cb06ccf32ae6ee58ca5a43ad2f5fdd Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 21:26:08 +0800 Subject: [PATCH 13/18] eee --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ec27e14..3f26a8a 100644 --- a/setup.py +++ b/setup.py @@ -327,7 +327,7 @@ def run(self): url="https://github.com/r9y9/pyopenjtalk", license="MIT", packages=find_packages(), - package_data={"": ["htsvoice/*"]}, + package_data={"": ["htsvoice/*", f"{_dict_folder_name}/*"]}, ext_modules=ext_modules, cmdclass=cmdclass, install_requires=[ From 1c493f5ef4c56c7b93086f4a3153c72a143fdd91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 12 Aug 2022 21:27:26 +0800 Subject: [PATCH 14/18] Update setup.py --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 3f26a8a..b7d109d 100644 --- a/setup.py +++ b/setup.py @@ -176,6 +176,7 @@ def update_to(self, b=1, bsize=1, tsize=None): self.total = tsize return self.update(b * bsize - self.n) + # extract dic filename = "dic.tar.gz" print('Downloading: "{}"'.format(_DICT_URL)) @@ -193,6 +194,7 @@ def update_to(self, b=1, bsize=1, tsize=None): f.extractall(path="./") os.remove(filename) + # generate config.h for mecab # NOTE: need to run cmake to generate config.h # we could do it on python side but it would be very tricky, From a9873b82e8a71bc11a2ee3f837198905aa3407f2 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 13:16:48 +0000 Subject: [PATCH 15/18] add download flag --- setup.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 5093dd9..f6c51ff 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,7 @@ import subprocess import sys import platform +import shutil from distutils.errors import DistutilsExecError from distutils.spawn import spawn from distutils.version import LooseVersion @@ -16,7 +17,6 @@ from setuptools import Extension, find_packages, setup import six -from tqdm.auto import tqdm if six.PY2: from urllib import urlretrieve else: @@ -171,31 +171,20 @@ def escape_macros(macros): src_top = join("lib", "open_jtalk", "src") -# https://github.com/tqdm/tqdm#hooks-and-callbacks -class _TqdmUpTo(tqdm): # type: ignore - def update_to(self, b=1, bsize=1, tsize=None): - if tsize is not None: - self.total = tsize - return self.update(b * bsize - self.n) - # extract dic filename = "dic.tar.gz" -print('Downloading: "{}"'.format(_DICT_URL)) -with _TqdmUpTo( - unit="B", - unit_scale=True, - unit_divisor=1024, - miniters=1, - desc="dic.tar.gz", -) as t: # all optional kwargs - urlretrieve(_DICT_URL, filename, reporthook=t.update_to) - t.total = t.n +print(f"Downloading: {_DICT_URL}") +urlretrieve(_DICT_URL, filename) +print("Download complete") + print("Extracting tar file {}".format(filename)) with tarfile.open(filename, mode="r|gz") as f: f.extractall(path="./") os.remove(filename) - +print("Extract complete") +shutil.copytree(f"./{_dict_folder_name}", f"./pyopenjtalk/{_dict_folder_name}") +sys.stdout.flush() # generate config.h for mecab # NOTE: need to run cmake to generate config.h From 89843b39754e368c79be118698b07ca8afc1d002 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Fri, 12 Aug 2022 14:35:58 +0000 Subject: [PATCH 16/18] reformat with isort and black --- pyopenjtalk/__init__.py | 6 +++--- pyopenjtalk/htsengine.pyx | 20 +++++++++++--------- pyopenjtalk/openjtalk.pyx | 23 ++++++++++++++++------- pyopenjtalk/openjtalk/jpcommon.pxd | 1 + pyopenjtalk/openjtalk/mecab2njd.pxd | 3 ++- pyopenjtalk/openjtalk/njd.pxd | 1 + pyopenjtalk/openjtalk/njd2jpcommon.pxd | 5 +++-- setup.py | 19 +++++++++---------- 8 files changed, 46 insertions(+), 32 deletions(-) diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index a266104..7b83da1 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -13,12 +13,12 @@ import tarfile try: - from .version import __version__ # NOQA + from pyopenjtalk.version import __version__ # NOQA except ImportError: raise ImportError("BUG: version.py doesn't exist. Please file a bug report.") -from .htsengine import HTSEngine -from .openjtalk import OpenJTalk +from pyopenjtalk.htsengine import HTSEngine +from pyopenjtalk.openjtalk import OpenJTalk # Dictionary directory # defaults to the package directory where the dictionary will be automatically downloaded diff --git a/pyopenjtalk/htsengine.pyx b/pyopenjtalk/htsengine.pyx index cdc6018..b30e813 100644 --- a/pyopenjtalk/htsengine.pyx +++ b/pyopenjtalk/htsengine.pyx @@ -6,21 +6,23 @@ import numpy as np cimport numpy as np + np.import_array() cimport cython +from cpython.mem cimport PyMem_Free, PyMem_Malloc from cython.parallel cimport prange -from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.stdint cimport uint8_t -from pyopenjtalk.htsengine cimport HTS_Engine -from pyopenjtalk.htsengine cimport ( - HTS_Engine_initialize, HTS_Engine_load, HTS_Engine_clear, HTS_Engine_refresh, - HTS_Engine_get_sampling_frequency, HTS_Engine_get_fperiod, - HTS_Engine_set_speed, HTS_Engine_add_half_tone, - HTS_Engine_synthesize_from_strings, - HTS_Engine_get_generated_speech, HTS_Engine_get_nsamples -) +from pyopenjtalk.htsengine cimport (HTS_Engine, HTS_Engine_add_half_tone, + HTS_Engine_clear, HTS_Engine_get_fperiod, + HTS_Engine_get_generated_speech, + HTS_Engine_get_nsamples, + HTS_Engine_get_sampling_frequency, + HTS_Engine_initialize, HTS_Engine_load, + HTS_Engine_refresh, HTS_Engine_set_speed, + HTS_Engine_synthesize_from_strings) + @cython.final @cython.no_gc diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 4f37112..eda7ae1 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -4,25 +4,34 @@ # cython: c_string_type=unicode, c_string_encoding=ascii, cdivision=True from libc.stdint cimport uint8_t + import numpy as np cimport numpy as np + np.import_array() cimport cython from cpython.bytes cimport PyBytes_AS_STRING -from pyopenjtalk.openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis -from pyopenjtalk.openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear -from pyopenjtalk.openjtalk.njd cimport NJD, NJD_initialize, NJD_refresh, NJD_print, NJD_clear from pyopenjtalk.openjtalk cimport njd as _njd -from pyopenjtalk.openjtalk.jpcommon cimport JPCommon, JPCommon_initialize,JPCommon_make_label -from pyopenjtalk.openjtalk.jpcommon cimport JPCommon_get_label_size, JPCommon_get_label_feature -from pyopenjtalk.openjtalk.jpcommon cimport JPCommon_refresh, JPCommon_clear from pyopenjtalk.openjtalk cimport njd2jpcommon -from pyopenjtalk.openjtalk.text2mecab cimport text2mecab +from pyopenjtalk.openjtalk.jpcommon cimport (JPCommon, JPCommon_clear, + JPCommon_get_label_feature, + JPCommon_get_label_size, + JPCommon_initialize, + JPCommon_make_label, + JPCommon_refresh) +from pyopenjtalk.openjtalk.mecab cimport (Mecab, Mecab_analysis, Mecab_clear, + Mecab_get_feature, Mecab_get_size, + Mecab_initialize, Mecab_load, + Mecab_refresh) from pyopenjtalk.openjtalk.mecab2njd cimport mecab2njd +from pyopenjtalk.openjtalk.njd cimport (NJD, NJD_clear, NJD_initialize, + NJD_print, NJD_refresh) from pyopenjtalk.openjtalk.njd2jpcommon cimport njd2jpcommon +from pyopenjtalk.openjtalk.text2mecab cimport text2mecab + cdef inline str njd_node_get_string(_njd.NJDNode* node): return ((_njd.NJDNode_get_string(node))).decode("utf-8") diff --git a/pyopenjtalk/openjtalk/jpcommon.pxd b/pyopenjtalk/openjtalk/jpcommon.pxd index 2dd450a..3667d1e 100644 --- a/pyopenjtalk/openjtalk/jpcommon.pxd +++ b/pyopenjtalk/openjtalk/jpcommon.pxd @@ -3,6 +3,7 @@ from libc.stdio cimport FILE + cdef extern from "jpcommon.h" nogil: cdef cppclass JPCommonNode: char *pron diff --git a/pyopenjtalk/openjtalk/mecab2njd.pxd b/pyopenjtalk/openjtalk/mecab2njd.pxd index d8fb8c4..f42d3ec 100644 --- a/pyopenjtalk/openjtalk/mecab2njd.pxd +++ b/pyopenjtalk/openjtalk/mecab2njd.pxd @@ -1,7 +1,8 @@ # distutils: language = c++ # cython: language_level=3 -from .njd cimport NJD +from pyopenjtalk.openjtalk.njd cimport NJD + cdef extern from "mecab2njd.h" nogil: void mecab2njd(NJD * njd, char **feature, int size); diff --git a/pyopenjtalk/openjtalk/njd.pxd b/pyopenjtalk/openjtalk/njd.pxd index abc1f2d..ef2b8b3 100644 --- a/pyopenjtalk/openjtalk/njd.pxd +++ b/pyopenjtalk/openjtalk/njd.pxd @@ -3,6 +3,7 @@ from libc.stdio cimport FILE + cdef extern from "njd.h" nogil: cdef cppclass NJDNode: char *string diff --git a/pyopenjtalk/openjtalk/njd2jpcommon.pxd b/pyopenjtalk/openjtalk/njd2jpcommon.pxd index 680aadf..a803032 100644 --- a/pyopenjtalk/openjtalk/njd2jpcommon.pxd +++ b/pyopenjtalk/openjtalk/njd2jpcommon.pxd @@ -1,8 +1,9 @@ # distutils: language = c++ # cython: language_level=3 -from .jpcommon cimport JPCommon -from .njd cimport NJD +from pyopenjtalk.openjtalk.jpcommon cimport JPCommon +from pyopenjtalk.openjtalk.njd cimport NJD + cdef extern from "njd2jpcommon.h" nogil: void njd2jpcommon(JPCommon * jpcommon, NJD * njd) diff --git a/setup.py b/setup.py index f6c51ff..700e4b8 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ import os -import subprocess -import sys import platform import shutil +import subprocess +import sys from distutils.errors import DistutilsExecError from distutils.spawn import spawn from distutils.version import LooseVersion @@ -14,15 +14,15 @@ import numpy as np import setuptools.command.build_py import setuptools.command.develop +import six from setuptools import Extension, find_packages, setup -import six if six.PY2: from urllib import urlretrieve else: from urllib.request import urlretrieve -import tarfile +import tarfile platform_is_windows = sys.platform == "win32" @@ -88,10 +88,10 @@ def build_extensions(self): system = platform.system() if system == "Windows": extra_compile_args = [] - extra_link_args = ['/openmp'] + extra_link_args = ["/openmp"] elif system == "Linux": - extra_compile_args = ['-fopenmp'] - extra_link_args = ['-fopenmp'] + extra_compile_args = ["-fopenmp"] + extra_link_args = ["-fopenmp"] elif system == "Darwin": os.system("brew install llvm libomp") os.system("brew install clang-omp") @@ -99,8 +99,8 @@ def build_extensions(self): extra_compile_args = ["-Xpreprocessor", "-fopenmp"] extra_link_args = ["-Xpreprocessor", "-fopenmp"] else: - extra_compile_args = ['-fopenmp'] - extra_link_args = ['-fopenmp'] + extra_compile_args = ["-fopenmp"] + extra_link_args = ["-fopenmp"] # Workaround for `distutils.spawn` problem on Windows python < 3.9 @@ -171,7 +171,6 @@ def escape_macros(macros): src_top = join("lib", "open_jtalk", "src") - # extract dic filename = "dic.tar.gz" print(f"Downloading: {_DICT_URL}") From 70f37d2ab8b6576ea421771b53500fa83c0bb824 Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Mon, 15 Aug 2022 10:11:55 +0800 Subject: [PATCH 17/18] feat: remove six and lazy init and tqdm --- .github/workflows/build_whl.yml | 2 +- pyopenjtalk/__init__.py | 48 --------------------------------- setup.py | 8 +----- 3 files changed, 2 insertions(+), 56 deletions(-) diff --git a/.github/workflows/build_whl.yml b/.github/workflows/build_whl.yml index 6799c9a..14c450a 100644 --- a/.github/workflows/build_whl.yml +++ b/.github/workflows/build_whl.yml @@ -27,7 +27,7 @@ jobs: python -m pip install --upgrade setuptools python -m pip install --upgrade wheel pip install flake8 pytest - pip install cython numpy tqdm six + pip install cython numpy tqdm - name: build_whl run: | python setup.py sdist bdist_wheel diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index 7b83da1..3aa2d8e 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -1,16 +1,7 @@ import os -from os.path import exists import pkg_resources -import six -from tqdm.auto import tqdm -if six.PY2: - from urllib import urlretrieve -else: - from urllib.request import urlretrieve - -import tarfile try: from pyopenjtalk.version import __version__ # NOQA @@ -26,8 +17,6 @@ "OPEN_JTALK_DICT_DIR", pkg_resources.resource_filename(__name__, "open_jtalk_dic_utf_8-1.11"), ).encode("utf-8") -_dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1" -_DICT_URL = f"{_dict_download_url}/open_jtalk_dic_utf_8-1.11.tar.gz" # Default mei_normal.voice for HMM-based TTS DEFAULT_HTS_VOICE = pkg_resources.resource_filename( @@ -41,41 +30,6 @@ _global_htsengine = None -# https://github.com/tqdm/tqdm#hooks-and-callbacks -class _TqdmUpTo(tqdm): # type: ignore - def update_to(self, b=1, bsize=1, tsize=None): - if tsize is not None: - self.total = tsize - return self.update(b * bsize - self.n) - - -def _extract_dic(): - global OPEN_JTALK_DICT_DIR - filename = pkg_resources.resource_filename(__name__, "dic.tar.gz") - print('Downloading: "{}"'.format(_DICT_URL)) - with _TqdmUpTo( - unit="B", - unit_scale=True, - unit_divisor=1024, - miniters=1, - desc="dic.tar.gz", - ) as t: # all optional kwargs - urlretrieve(_DICT_URL, filename, reporthook=t.update_to) - t.total = t.n - print("Extracting tar file {}".format(filename)) - with tarfile.open(filename, mode="r|gz") as f: - f.extractall(path=pkg_resources.resource_filename(__name__, "")) - OPEN_JTALK_DICT_DIR = pkg_resources.resource_filename( - __name__, "open_jtalk_dic_utf_8-1.11" - ).encode("utf-8") - os.remove(filename) - - -def _lazy_init(): - if not exists(OPEN_JTALK_DICT_DIR): - _extract_dic() - - def g2p(*args, **kwargs): """Grapheme-to-phoeneme (G2P) conversion @@ -93,7 +47,6 @@ def g2p(*args, **kwargs): """ global _global_jtalk if _global_jtalk is None: - _lazy_init() _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) return _global_jtalk.g2p(*args, **kwargs) @@ -164,6 +117,5 @@ def run_frontend(text, verbose=0): """ global _global_jtalk if _global_jtalk is None: - _lazy_init() _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) return _global_jtalk.run_frontend(text, verbose) diff --git a/setup.py b/setup.py index 6115e32..a32205c 100644 --- a/setup.py +++ b/setup.py @@ -14,13 +14,9 @@ import numpy as np import setuptools.command.build_py import setuptools.command.develop -import six from setuptools import Extension, find_packages, setup -if six.PY2: - from urllib import urlretrieve -else: - from urllib.request import urlretrieve +from urllib.request import urlretrieve import tarfile @@ -323,8 +319,6 @@ def run(self): install_requires=[ "numpy >= 1.20.0", "cython >= " + min_cython_ver, - "six", - "tqdm", ], tests_require=["nose", "coverage"], extras_require={ From bc8699a5375036c4d6d63194eed57a6a7e35a1f0 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Mon, 15 Aug 2022 09:06:54 +0000 Subject: [PATCH 18/18] ignore exc when datafile exists --- pyopenjtalk/__init__.py | 1 - setup.py | 13 +++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index 3aa2d8e..2e0e3f5 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -2,7 +2,6 @@ import pkg_resources - try: from pyopenjtalk.version import __version__ # NOQA except ImportError: diff --git a/setup.py b/setup.py index a32205c..ce6e3fe 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,7 @@ import shutil import subprocess import sys +import tarfile from distutils.errors import DistutilsExecError from distutils.spawn import spawn from distutils.version import LooseVersion @@ -10,16 +11,13 @@ from itertools import chain from os.path import exists, join from subprocess import run +from urllib.request import urlretrieve import numpy as np import setuptools.command.build_py import setuptools.command.develop from setuptools import Extension, find_packages, setup -from urllib.request import urlretrieve - -import tarfile - platform_is_windows = sys.platform == "win32" version = "0.3.0" @@ -176,8 +174,11 @@ def escape_macros(macros): f.extractall(path="./") os.remove(filename) print("Extract complete") -shutil.copytree(f"./{_dict_folder_name}", f"./pyopenjtalk/{_dict_folder_name}") -sys.stdout.flush() +try: + shutil.copytree(f"./{_dict_folder_name}", f"./pyopenjtalk/{_dict_folder_name}") + sys.stdout.flush() +except FileExistsError: + pass # generate config.h for mecab # NOTE: need to run cmake to generate config.h