diff --git a/.ci/pipeline/build-and-test-win.yml b/.ci/pipeline/build-and-test-win.yml index 0db4207670..c09beed84d 100644 --- a/.ci/pipeline/build-and-test-win.yml +++ b/.ci/pipeline/build-and-test-win.yml @@ -51,7 +51,7 @@ steps: - script: | call activate CB cd .. - call s\conda-recipe\run_test.bat s + call s\conda-recipe\run_test.bat s\ displayName: 'Sklearnex testing' - script: | call activate CB diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index b9c234366f..826389b0a7 100644 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -62,6 +62,55 @@ jobs: pip install isort==${ISORT_VERSION} black[jupyter]==${BLACK_VERSION} isort --profile black --check . && black --check . displayName: 'Linting' +- job: LinuxCondaRecipe + dependsOn: Lint + timeoutInMinutes: 120 + strategy: + matrix: + Python3.12: + PYTHON_VERSION: '3.12' + NUMPY_VERSION: '2.1' + pool: + vmImage: 'ubuntu-22.04' + steps: + - script: | + conda config --add channels conda-forge + conda config --set channel_priority strict + conda update -y -q --all + displayName: "Conda update" + - script: | + conda create -y -q -n build-env conda-build conda-verify + displayName: "Conda create" + - script: | + . /usr/share/miniconda/etc/profile.d/conda.sh + conda activate build-env + conda build . --python $(PYTHON_VERSION) --numpy $(NUMPY_VERSION) + displayName: "Conda build and test" +- job: WindowsCondaRecipe + dependsOn: Lint + timeoutInMinutes: 120 + strategy: + matrix: + Python3.12: + PYTHON_VERSION: '3.12' + NUMPY_VERSION: '2.1' + pool: + vmImage: 'windows-2022' + steps: + - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" + displayName: Add conda to PATH + - script: | + conda config --add channels conda-forge + conda config --set channel_priority strict + conda update -y -q --all + displayName: "Conda update" + - script: | + conda create -y -q -n build-env conda-build conda-verify + displayName: "Conda create" + - script: | + call activate build-env + conda build . --python $(PYTHON_VERSION) --numpy $(NUMPY_VERSION) + displayName: "Conda build and test" - job: LinuxCondaEnv dependsOn: Lint timeoutInMinutes: 120 @@ -113,6 +162,6 @@ jobs: PYTHON_VERSION: '3.12' SKLEARN_VERSION: '1.5' pool: - vmImage: 'windows-latest' + vmImage: 'windows-2022' steps: - template: build-and-test-win.yml diff --git a/.ci/pipeline/nightly.yml b/.ci/pipeline/nightly.yml index b6f32c0ff4..998ccd934e 100644 --- a/.ci/pipeline/nightly.yml +++ b/.ci/pipeline/nightly.yml @@ -34,7 +34,7 @@ variables: jobs: - job: Coverity pool: - vmImage: 'ubuntu-latest' + vmImage: 'ubuntu-22.04' steps: - script: | cd $(Agent.BuildDirectory) @@ -58,7 +58,7 @@ jobs: - job: Jupyter timeoutInMinutes: 0 pool: - vmImage: 'ubuntu-latest' + vmImage: 'ubuntu-22.04' steps: - script: | conda config --append channels conda-forge @@ -110,6 +110,6 @@ jobs: PYTHON_VERSION: '3.11' SKLEARN_VERSION: 'main' pool: - vmImage: 'windows-latest' + vmImage: 'windows-2022' steps: - template: build-and-test-win.yml diff --git a/.ci/scripts/gen_release_jobs.py b/.ci/scripts/gen_release_jobs.py index f8de27f4f8..6dc6293454 100644 --- a/.ci/scripts/gen_release_jobs.py +++ b/.ci/scripts/gen_release_jobs.py @@ -24,7 +24,9 @@ CHANNELS = args.channels PYTHON_VERSIONS = ["3.9", "3.10", "3.11"] -SYSTEMS = ["ubuntu-latest", "windows-latest"] +# image versions are pinned to exact number instead of "latest" +# to avoid unexpected failures when images are updated +SYSTEMS = ["ubuntu-22.04", "windows-2022"] ACTIVATE = { "ubuntu-latest": "conda activate", "windows-latest": "call activate", diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c3af1ec90..d00c366c27 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -245,7 +245,7 @@ jobs: call .\.github\scripts\activate_components.bat ${{ steps.set-env.outputs.DPCFLAG }} set PYTHON=python cd .. - call scikit-learn-intelex\conda-recipe\run_test.bat scikit-learn-intelex + call scikit-learn-intelex\conda-recipe\run_test.bat scikit-learn-intelex\ - name: Sklearn testing shell: cmd run: | diff --git a/INSTALL.md b/INSTALL.md index 0dfc77691f..2c4d22975c 100755 --- a/INSTALL.md +++ b/INSTALL.md @@ -30,6 +30,9 @@ To install Intel(R) Extension for Scikit-learn*, use one of the following scenar - [Prerequisites](#prerequisites) - [Configure the Build with Environment Variables](#configure-the-build-with-environment-variables) - [Build Intel(R) Extension for Scikit-learn](#build-intelr-extension-for-scikit-learn) +- [Build from Sources with `conda-build`](#build-from-sources-with-conda-build) + - [Prerequisites for `conda-build`](#prerequisites-for-conda-build) + - [Build Intel(R) Extension for Scikit-learn with `conda-build`](#build-intelr-extension-for-scikit-learn-with-conda-build) - [Next Steps](#next-steps) > **_NOTE:_** Intel(R) Extension for Scikit-learn* is also available as a part of [IntelĀ® AI Tools](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ai-analytics-toolkit.html). If you already have it installed, you do not need to separately install the extension. @@ -212,6 +215,30 @@ back to the project source-code directory. That way, you can edit the source cod without reinstalling the package after a small change. * `--single-version-externally-managed` is an option for Python packages instructing the setup tools module to create a package the host's package manager can easily manage. +## Build from Sources with `conda-build` + +Intel(R) Extension for Scikit-learn* is easily built from the sources using only one command and `conda-build` utility. + +### Prerequisites for `conda-build` + +* any `conda` distribution (`miniforge` is recommended) +* `conda-build` and `conda-verify` installed in a conda environment +* (Windows only) Microsoft Visual Studio* +* (optional) Intel(R) oneAPI DPC++/C++ Compiler + +`conda-build` config requires **2022** version of Microsoft Visual Studio* by default, you can specify another version in `conda-recipe/conda_build_config.yaml` if needed. + +In order to enable DPC++ interfaces support on Windows, you need to set `DPCPPROOT` environment variable pointing to DPC++/C++ Compiler distribution. +Conda-forge distribution of DPC++ compiler is used by default on Linux, but you still can set your own distribution via `DPCPPROOT` variable. + +### Build Intel(R) Extension for Scikit-learn with `conda-build` + +Create and verify `scikit-learn-intelex` conda package with next command executed from root of sklearnex repo: + +```bash +conda build . +``` + ## Next Steps - [Learn what patching is and how to patch scikit-learn](https://intel.github.io/scikit-learn-intelex/latest/what-is-patching.html) diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index e50a167864..15321a39ec 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -15,21 +15,19 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem ============================================================================ -IF DEFINED PKG_VERSION (set DAAL4PY_VERSION=%PKG_VERSION%) - -set MPIROOT=%PREFIX%\Library - +IF NOT DEFINED PYTHON (set "PYTHON=python") +IF DEFINED PKG_VERSION (set SKLEARNEX_VERSION=%PKG_VERSION%) IF NOT DEFINED DALROOT (set DALROOT=%PREFIX%) +IF NOT DEFINED MPIROOT IF "%NO_DIST%"=="" (set MPIROOT=%PREFIX%\Library) -set "BUILD_ARGS=" +rem reset preferred compilers to avoid usage of icx/icpx by default in all cases +set CC=cl.exe +set CXX=cl.exe +rem source compiler if DPCPPROOT is set outside of conda-build IF DEFINED DPCPPROOT ( echo "Sourcing DPCPPROOT" call "%DPCPPROOT%\env\vars.bat" ) -set PATH=%PATH%;%PREFIX%\Library\bin\libfabric - -%PYTHON% setup.py build %BUILD_ARGS% -IF %ERRORLEVEL% neq 0 EXIT /b %ERRORLEVEL% %PYTHON% setup.py install --single-version-externally-managed --record record.txt diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index 5f8987deeb..b0b3136da7 100755 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -15,28 +15,29 @@ # limitations under the License. #=============================================================================== -if [ "$PY3K" == "1" ]; then - ARGS=" --single-version-externally-managed --record=record.txt" -else - ARGS="--old-and-unmanageable" +if [ -z "${PYTHON}" ]; then + export PYTHON=python fi -# if dpc++ vars path is specified -if [ ! -z "${DPCPPROOT}" ]; then - source ${DPCPPROOT}/env/vars.sh +if [ ! -z "${PKG_VERSION}" ]; then + export SKLEARNEX_VERSION=$PKG_VERSION fi if [ -z "${DALROOT}" ]; then export DALROOT=${PREFIX} fi -if [ "$(uname)" == "Darwin" ]; then - export CC=gcc - export CXX=g++ +if [ -z "${MPIROOT}" ] && [ -z "${NO_DIST}" ]; then + export MPIROOT=${PREFIX} fi - -if [ ! -z "${PKG_VERSION}" ]; then - export DAAL4PY_VERSION=$PKG_VERSION +# reset preferred compilers to avoid usage of icx/icpx by default in all cases +if [ ! -z "${CC_FOR_BUILD}" ] && [ ! -z "${CXX_FOR_BUILD}" ]; then + export CC=$CC_FOR_BUILD + export CXX=$CXX_FOR_BUILD fi -export MPIROOT=${PREFIX} -${PYTHON} setup.py install $ARGS +# source compiler if DPCPPROOT is set outside of conda-build +if [ ! -z "${DPCPPROOT}" ]; then + source ${DPCPPROOT}/env/vars.sh +fi + +${PYTHON} setup.py install --single-version-externally-managed --record record.txt diff --git a/conda-recipe/conda_build_config.yaml b/conda-recipe/conda_build_config.yaml index f52d3395a2..31ae4df64e 100755 --- a/conda-recipe/conda_build_config.yaml +++ b/conda-recipe/conda_build_config.yaml @@ -14,5 +14,7 @@ # limitations under the License. #=============================================================================== -numpy: - - 1.19 +c_compiler: # [win] +- vs2022 # [win] +cxx_compiler: # [win] +- vs2022 # [win] diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 98a6e537eb..cef1bbba49 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -14,96 +14,109 @@ # limitations under the License. #=============================================================================== -{% set version = '2024.1' %} +# NB: this recipe should be synced between sklearnex repo and feedstocks + +{% set name = "scikit-learn-intelex" %} {% set buildnumber = 0 %} +# version is set manually in feedstocks and through git tag in repo +# {% set version = "1.1.1" %} package: - name: daal4py - version: {{ version }} + name: {{ name|lower }} + version: {{ environ.get('GIT_DESCRIBE_TAG') }} + # version: {{ version }} +# Reminder: source should point to path for repo or github archive for feedstock +# source: +# url: https://github.com/intel/scikit-learn-intelex/archive/{{ version }}.tar.gz +# sha256: ... source: - path: .. + path: .. build: - number: {{ buildnumber }} - include_recipe: False - script_env: - - DPCPPROOT - - DALROOT - ignore_run_exports: - - python - - mpich + skip: true # [not (linux64 or win)] + number: {{ buildnumber }} + include_recipe: False + script_env: + - DPCPPROOT + - DALROOT + - NO_DIST=1 # [win] requirements: - build: - - {{ compiler('cxx') }} # [not osx] - - {{ compiler('c') }} # [not osx] - host: - - python - - setuptools - - numpy {{ numpy }} - - dal-devel ==2024.1.0 - - cython - - jinja2 - - mpich # [osx] - - impi-devel # [not osx] - - clang-tools - - pybind11 - - make - run: - - python - - dpcpp_cpp_rt ==2024.1.0 # [linux] - - dpcpp_cpp_rt ==2024.1.0 # [win or osx] - - dal ==2024.1.0 - ignore_run_exports: - - numpy + build: + - make # [linux] + - dpcpp_linux-64 # [linux64] + # - dpcpp_win-64 # [win] + - {{ compiler('cxx') }} # [linux64 or win] + # conda-forge feedstock specific + # - {{ stdlib("c") }} # [linux64 or win] + host: + - python + - setuptools + - cmake + - clang-format + - cython + - jinja2 + - pybind11 + - numpy {{ numpy }} + - impi-devel # [not win] + # dal-devel pinning depends on the recipe location (repo or feedstock) + - dal-devel + # - dal-devel =={{ version }} + run: + - python + - {{ pin_compatible('numpy') }} + - dpcpp-cpp-rt # [linux64] + # dal pinning depends on the recipe location (repo or feedstock) + - dal + # - dal =={{ version }} test: - requires: - - lightgbm # [not win] - - pandas - - scipy - - scikit-learn - - threadpoolctl - - xgboost # [not win] - - catboost - - pytest - - mpich # [osx] - - impi_rt # [not osx] - source_files: - - examples - - tests - - daal4py/sklearn - commands: - - python -c "import daal4py" - - mpirun -n 4 pytest --verbose -s tests/test*spmd*.py # [not win] - - mpiexec -localonly -n 4 pytest --verbose -s tests/test*spmd*.py # [win] - - pytest --pyargs --verbose -s tests - - pytest --pyargs daal4py/sklearn/ - - python tests/run_examples.py + requires: + - pyyaml + - impi_rt # [not win] + # TODO: enable data parallel frameworks when they are available on conda-forge + # - dpctl + # - dpnp + # next deps are synced with requirements-test.txt + - pytest + - scikit-learn + - pandas + - xgboost + - lightgbm + # TODO: re-enable shap and catboost when conda-forge packages has a numpy 2.* compatibility + # - shap + # - catboost + - array-api-compat + - array-api-strict + source_files: + - .ci + - examples + - tests about: - about: - home: https://intelpython.github.io/daal4py/ - license: Apache-2.0 - license_file: - - LICENSE - - doc/third-party-programs.txt - summary: A convenient Python API to Intel (R) oneAPI Data Analytics Library - description: | - LEGAL NOTICE: Use of this software package is subject to the - software license agreement (as set forth above, in the license section of - the installed Conda package and/or the README file) and all notices, - disclaimers or license terms for third party or open source software - included in or with the software. -

- EULA: Apache-2.0 -

- dev_url: https://github.com/IntelPython/daal4py - doc_url: https://intelpython.github.io/daal4py + home: https://intel.github.io/scikit-learn-intelex + license: Apache-2.0 + license_file: + - LICENSE + - doc/third-party-programs-sklearnex.txt + summary: Intel(R) Extension for Scikit-learn* is a seamless way to speed up your Scikit-learn application. + description: | + LEGAL NOTICE: Use of this software package is subject to the + software license agreement (as set forth above, in the license section of + the installed Conda package and/or the README file) and all notices, + disclaimers or license terms for third party or open source software + included in or with the software. +

+ EULA: Apache-2.0 +

+ dev_url: https://github.com/intel/scikit-learn-intelex + doc_url: https://intel.github.io/scikit-learn-intelex extra: - recipe-maintainers: - # GitHub IDs for maintainers of the recipe. - - napetrov - - Alexsandruss + recipe-maintainers: + # GitHub IDs for maintainers of the recipe. + - napetrov + - Alexsandruss + - maria-Petrova + - ethanglaser diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat index a65365cc5d..a9e27b0c91 100644 --- a/conda-recipe/run_test.bat +++ b/conda-recipe/run_test.bat @@ -15,34 +15,18 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem ============================================================================ -rem %1 - scikit-learn-intelex repo root +rem %1 - scikit-learn-intelex repo root (should end with '\', leave empty if it's %cd% / $PWD) -set MPIROOT=%PREFIX%\Library set exitcode=0 -IF DEFINED DPCPPROOT ( - echo "Sourcing DPCPPROOT" - call "%DPCPPROOT%\env\vars.bat" || set exitcode=1 - set "CC=dpcpp" - set "CXX=dpcpp" - dpcpp --version -) +IF NOT DEFINED PYTHON (set "PYTHON=python") -IF DEFINED DALROOT ( - echo "Sourcing DALROOT" - call "%DALROOT%\env\vars.bat" || set exitcode=1 - echo "Finish sourcing DALROOT" -) +%PYTHON% -c "from sklearnex import patch_sklearn; patch_sklearn()" || set exitcode=1 -IF DEFINED TBBROOT ( - echo "Sourcing TBBROOT" - call "%TBBROOT%\env\vars.bat" || set exitcode=1 -) +%PYTHON% -m pytest --verbose -s %1tests || set exitcode=1 -%PYTHON% -m pytest --verbose -s %1\tests || set exitcode=1 - -pytest --verbose --pyargs %1\daal4py\sklearn || set exitcode=1 +pytest --verbose --pyargs daal4py || set exitcode=1 pytest --verbose --pyargs sklearnex || set exitcode=1 -pytest --verbose --pyargs %1\onedal --deselect="onedal/common/tests/test_policy.py" || set exitcode=1 -pytest --verbose %1\.ci\scripts\test_global_patch.py || set exitcode=1 +pytest --verbose --pyargs onedal || set exitcode=1 +pytest --verbose %1.ci\scripts\test_global_patch.py || set exitcode=1 EXIT /B %exitcode% diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index bc8e557b9b..af8df1a887 100755 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -15,55 +15,50 @@ # limitations under the License. #=============================================================================== -daal4py_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +sklex_root="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" count=3 while [[ count -ne 0 ]]; do - if [[ -d $daal4py_dir/daal4py/ && -d $daal4py_dir/tests/ && -d $daal4py_dir/examples/daal4py ]]; then + if [[ -d $sklex_root/.ci/ && -d $sklex_root/examples/ && -d $sklex_root/tests/ ]]; then break fi - daal4py_dir="$( dirname "${daal4py_dir}" )" + sklex_root="$( dirname "${sklex_root}" )" count=$(($count - 1)) done -echo "daal4py_dir=$daal4py_dir" if [[ count -eq 0 ]]; then - echo "run_test.sh must be in daal4py repository" + echo "run_test.sh did not find the required testing directories" exit 1 fi -echo "Start testing ..." return_code=0 -python -c "import daal4py" -return_code=$(($return_code + $?)) +if [ -z "${PYTHON}" ]; then + export PYTHON=python +fi -echo "Pytest run of legacy unittest ..." -echo ${daal4py_dir} -pytest --verbose -s ${daal4py_dir}/tests +${PYTHON} -c "from sklearnex import patch_sklearn; patch_sklearn()" return_code=$(($return_code + $?)) -echo "NO_DIST=$NO_DIST" -if [[ ! $NO_DIST ]]; then - echo "MPI pytest run of legacy unittest ..." - mpirun --version - mpirun -n 4 pytest --verbose -s ${daal4py_dir}/tests/test*spmd*.py - return_code=$(($return_code + $?)) -fi +pytest --verbose -s ${sklex_root}/tests +return_code=$(($return_code + $?)) -echo "Pytest of daal4py running ..." -pytest --verbose --pyargs ${daal4py_dir}/daal4py/sklearn +pytest --verbose --pyargs daal4py return_code=$(($return_code + $?)) -echo "Pytest of sklearnex running ..." pytest --verbose --pyargs sklearnex return_code=$(($return_code + $?)) -echo "Pytest of onedal running ..." -pytest --verbose --pyargs ${daal4py_dir}/onedal +pytest --verbose --pyargs onedal return_code=$(($return_code + $?)) -echo "Global patching test running ..." -pytest --verbose -s ${daal4py_dir}/.ci/scripts/test_global_patch.py +pytest --verbose -s ${sklex_root}/.ci/scripts/test_global_patch.py return_code=$(($return_code + $?)) +echo "NO_DIST=$NO_DIST" +if [[ ! $NO_DIST ]]; then + mpirun --version + mpirun -n 4 pytest --verbose -s ${sklex_root}/tests/test*spmd*.py + return_code=$(($return_code + $?)) +fi + exit $return_code diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 2ba4b7eb64..0b26468768 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -27,6 +27,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(WIN32) + # hint CMake to get python from PYTHON env. variable if defined + if(DEFINED ENV{PYTHON}) + set(PYTHON_EXECUTABLE $ENV{PYTHON}) + endif() set(SDL_FLAGS "-GS -DynamicBase") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /MD") elseif(UNIX)