From b040aecbb38a5f60022f4e8e7416d677a8a04f3f Mon Sep 17 00:00:00 2001 From: Julien Cretin Date: Fri, 25 Oct 2024 14:18:36 +0200 Subject: [PATCH 1/5] Add maturin workflow to generate wheels (#747) * Add maturin workflow to generate wheels * Update workflow trigger condition * Fix version before building * fix * fix * disable maturin in pull_request --- .github/workflows/maturin.yml | 85 +++++++++++++++++++ python/pyproject.toml | 2 - python/scripts/fix_package_version.py | 115 ++++++++++++++++++++++++++ rust/onnx/build.sh | 53 ++++++++++++ rust/onnx/maturin.sh | 31 +++++++ 5 files changed, 284 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/maturin.yml create mode 100755 python/scripts/fix_package_version.py create mode 100755 rust/onnx/build.sh create mode 100755 rust/onnx/maturin.sh diff --git a/.github/workflows/maturin.yml b/.github/workflows/maturin.yml new file mode 100644 index 00000000..3ef2abc9 --- /dev/null +++ b/.github/workflows/maturin.yml @@ -0,0 +1,85 @@ +# This file is autogenerated by maturin v1.7.4 and manually modified by ia0. +name: Maturin + +on: + push: + branches: + - 'main' + tags: + - 'python-v*' + schedule: + - cron: '12 3 * * 4' # Refresh the cache weekly. + workflow_dispatch: + # Uncomment to test the workflow in a PR. + # pull_request: + +permissions: + contents: read + +jobs: + build: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: windows-latest + target: x64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - if: matrix.platform.runner == 'ubuntu-latest' + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + with: + path: rust/onnx/runtime/build/Linux + key: maturin-${{ matrix.platform.target }}-${{ hashFiles('rust/onnx/build.sh') }} + # TODO(reyammer): Factorize this script into the one that also builds. + - run: python3 ./python/scripts/fix_package_version.py + - if: matrix.platform.runner == 'ubuntu-latest' + name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out=../dist + before-script-linux: "${{ github.workspace }}/rust/onnx/maturin.sh" + manylinux: 2_28 + working-directory: python + - if: matrix.platform.runner != 'ubuntu-latest' + name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out=../dist + working-directory: python + - name: Install wheels + run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.whl')[0])") + - run: magika --version + - run: python3 -c 'import magika; print(magika.__version__)' + - run: magika -r tests_data/basic + - run: python3 ./python/scripts/run_quick_test_magika_cli.py + - run: python3 ./python/scripts/run_quick_test_magika_module.py + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.platform.runner }}-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + # TODO(https://github.com/PyO3/maturin/issues/2244): Remove when released. + - run: rm rust/cli/README.md + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out=../dist + working-directory: python + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist diff --git a/python/pyproject.toml b/python/pyproject.toml index 8271663b..cfa4c67a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -74,8 +74,6 @@ module-name = "magika" bindings = "bin" manifest-path = "../rust/cli/Cargo.toml" locked = true -compatibility = "linux" -skip-auditwheel = true [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. diff --git a/python/scripts/fix_package_version.py b/python/scripts/fix_package_version.py new file mode 100755 index 00000000..e6a2560d --- /dev/null +++ b/python/scripts/fix_package_version.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# Copyright 2023-2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path + + +def main() -> None: + repo_root_dir = Path(__file__).parent.parent.parent + python_root_dir = repo_root_dir / "python" + rust_root_dir = repo_root_dir / "rust" + # Compute paths to files we'll need to restore at the end of the build + rust_main_rs_path = rust_root_dir / "cli" / "src" / "main.rs" + rust_cli_cargo_toml_path = rust_root_dir / "cli" / "Cargo.toml" + + # get the rust version from Cargo.toml and patch main.rs + rust_version = get_rust_version(rust_root_dir) + patch_main_rs_with_version(rust_main_rs_path, rust_version) + + # get the python version from magika.__version__ and patch Cargo.toml + python_version = get_python_version(python_root_dir) + patch_cargo_toml_with_version(rust_cli_cargo_toml_path, python_version) + + # update Cargo.lock + subprocess.run(["cargo", "check"], cwd=rust_root_dir / "cli", check=True) + + +def get_rust_version(rust_root_dir: Path) -> str: + cargo_path = rust_root_dir / "cli" / "Cargo.toml" + version = extract_with_regex(cargo_path, 'version = "([A-Za-z0-9.-]+)".*') + print(f"Extracted rust version: {version}") + return version + + +def get_python_version(python_root_dir: Path) -> str: + init_path = python_root_dir / "src" / "magika" / "__init__.py" + version = extract_with_regex(init_path, '__version__ = "([A-Za-z0-9.-]+)"') + print(f"Extracted python version: {version}") + return version + + +def patch_main_rs_with_version(rust_main_rs_path: Path, version: str) -> None: + print(f'Patching {rust_main_rs_path} with rust version "{version}"') + patch_line_matching_prefix( + rust_main_rs_path, + " let binary = clap::crate_version!();", + f' let binary = "{version}";', + ) + + +def patch_cargo_toml_with_version(cargo_toml_path: Path, version: str) -> None: + print(f'Patching {cargo_toml_path} with python version "{version}"') + patch_line_matching_prefix(cargo_toml_path, "version = ", f'version = "{version}"') + + +def extract_with_regex(file_path: Path, regex: str) -> str: + """Extract a string via regex. This raises an exception if no or more than + one matches are found.""" + + lines = file_path.read_text().split("\n") + output = None + for line in lines: + m = re.fullmatch(regex, line) + if m: + if output is not None: + raise Exception( + f'ERROR: Found more than one match for "{regex}" in {file_path}' + ) + output = m.group(1) + if output is None: + raise Exception(f'No hits for "{regex}" in {file_path}') + return output + + +def patch_line_matching_prefix(file_path: Path, prefix: str, new_line: str) -> None: + """Patch a line starting with a given prefix with a new line. This raises an + exception if no such line or more than one lines with a given prefix are + found.""" + + lines = file_path.read_text().split("\n") + already_found = False + for line_idx in range(len(lines)): + line = lines[line_idx] + if line.startswith(prefix): + if already_found: + raise Exception( + f'ERROR: Found more than one line with prefix "{prefix}" in "{file_path}"' + ) + already_found = True + lines[line_idx] = new_line + + if not already_found: + raise Exception( + f'ERROR: Did not find any line with prefix "{prefix}" in "{file_path}"' + ) + file_path.write_text("\n".join(lines)) + + +if __name__ == "__main__": + main() diff --git a/rust/onnx/build.sh b/rust/onnx/build.sh new file mode 100755 index 00000000..dee2f92d --- /dev/null +++ b/rust/onnx/build.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +. ../color.sh + +# This script builds ONNX Runtime as a static library to be linked in the Magika CLI. +# +# This is needed when building for manylinux since the prebuilt binaries provided by the ort crate +# have too recent dependency requirements. + +if [ -e runtime ]; then + info "Using cached static libraries." +else + info "Make sure we have Python 3.x and cmake-3.27 or higher." + python3 -m venv venv + source venv/bin/activate + python3 -m pip install cmake + + info "Clone ONNX Runtime repository (recursively)." + git clone --recursive https://github.com/Microsoft/onnxruntime.git runtime + cd runtime + + info "Checkout v1.19.2 because that's what ort v2.0.0-rc.8 supports." + git checkout v1.19.2 + + info "Build the static libraries." + x ./build.sh --config=Release --parallel $ONNX_RUNTIME_BUILD_FLAGS + + info "Only keep the static libraries to save cache space." + find build/Linux -not -name '*.a' \( -not -type d -or -empty \) -delete + cd .. +fi + +info "Point the ort crate to the locally built static library." +cd ../.. +cat >> .cargo/config.toml < Date: Sat, 26 Oct 2024 10:23:13 +0530 Subject: [PATCH 2/5] added 2 test cases for rust manually --- tests_data/basic/rust/test_case1.rs | 12 ++++++++++++ tests_data/basic/rust/test_case2.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests_data/basic/rust/test_case1.rs create mode 100644 tests_data/basic/rust/test_case2.rs diff --git a/tests_data/basic/rust/test_case1.rs b/tests_data/basic/rust/test_case1.rs new file mode 100644 index 00000000..8c709d9b --- /dev/null +++ b/tests_data/basic/rust/test_case1.rs @@ -0,0 +1,12 @@ +/// Sample function to load a file +fn load_model() { + println!("Magika model ⏳"); + // Simulating a delay for loading + std::thread::sleep(std::time::Duration::from_millis(200)); + println!("Model loaded successfully!"); +} + +fn main() { + // Load the model + load_model(); +} \ No newline at end of file diff --git a/tests_data/basic/rust/test_case2.rs b/tests_data/basic/rust/test_case2.rs new file mode 100644 index 00000000..b26a121b --- /dev/null +++ b/tests_data/basic/rust/test_case2.rs @@ -0,0 +1,28 @@ +use std::fs::File; +use std::io::{Write, BufWriter}; + +// Function to create a CSV file +fn create_csv() -> std::io::Result<()> { + let mut file = File::create("sample.csv")?; + writeln!(file, "Name,Age,City")?; + writeln!(file, "Alice,30,New York")?; + writeln!(file, "Bob,25,Los Angeles")?; + writeln!(file, "Charlie,35,Chicago")?; + Ok(()) +} + +// Function to create a JSON file +fn create_json() -> std::io::Result<()> { + let mut file = File::create("sample.json")?; + writeln!(file, "{{\"name\": \"Alice\", \"age\": 30, \"city\": \"New York\"}}")?; + Ok(()) +} + +// Main function +fn main() -> std::io::Result<()> { + // Function calls + create_csv()?; + create_json()?; + println!("Sample files created successfully."); + Ok(()) +} \ No newline at end of file From 5ca35984faafe566770d6661b077ef2ef53416d0 Mon Sep 17 00:00:00 2001 From: ia0 Date: Sat, 26 Oct 2024 11:24:02 +0200 Subject: [PATCH 3/5] Also fix version in maturin sdist --- .github/workflows/maturin.yml | 2 +- python/scripts/fix_package_version.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/maturin.yml b/.github/workflows/maturin.yml index 3ef2abc9..fe2a3910 100644 --- a/.github/workflows/maturin.yml +++ b/.github/workflows/maturin.yml @@ -35,7 +35,6 @@ jobs: with: path: rust/onnx/runtime/build/Linux key: maturin-${{ matrix.platform.target }}-${{ hashFiles('rust/onnx/build.sh') }} - # TODO(reyammer): Factorize this script into the one that also builds. - run: python3 ./python/scripts/fix_package_version.py - if: matrix.platform.runner == 'ubuntu-latest' name: Build wheels @@ -72,6 +71,7 @@ jobs: - uses: actions/checkout@v4 # TODO(https://github.com/PyO3/maturin/issues/2244): Remove when released. - run: rm rust/cli/README.md + - run: python3 ./python/scripts/fix_package_version.py - name: Build sdist uses: PyO3/maturin-action@v1 with: diff --git a/python/scripts/fix_package_version.py b/python/scripts/fix_package_version.py index e6a2560d..53ea9410 100755 --- a/python/scripts/fix_package_version.py +++ b/python/scripts/fix_package_version.py @@ -20,6 +20,8 @@ from pathlib import Path +# TODO(reyammer): Factorize this script into build_python_package.py. +# TODO(https://github.com/PyO3/maturin/issues/2163): Remove this file when fixed. def main() -> None: repo_root_dir = Path(__file__).parent.parent.parent python_root_dir = repo_root_dir / "python" From 86e9b8d76a50ee2803673741b4af7657fb4029ae Mon Sep 17 00:00:00 2001 From: ia0 Date: Sat, 26 Oct 2024 11:24:37 +0200 Subject: [PATCH 4/5] test --- .github/workflows/maturin.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maturin.yml b/.github/workflows/maturin.yml index fe2a3910..a4317152 100644 --- a/.github/workflows/maturin.yml +++ b/.github/workflows/maturin.yml @@ -11,7 +11,7 @@ on: - cron: '12 3 * * 4' # Refresh the cache weekly. workflow_dispatch: # Uncomment to test the workflow in a PR. - # pull_request: + pull_request: # DO NOT MERGE: testing only permissions: contents: read From 8cb2d8a9b13198ddef4ae9ba03d949908ea87e9c Mon Sep 17 00:00:00 2001 From: ia0 Date: Sat, 26 Oct 2024 11:30:12 +0200 Subject: [PATCH 5/5] Revert "test" This reverts commit 86e9b8d76a50ee2803673741b4af7657fb4029ae. --- .github/workflows/maturin.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maturin.yml b/.github/workflows/maturin.yml index a4317152..fe2a3910 100644 --- a/.github/workflows/maturin.yml +++ b/.github/workflows/maturin.yml @@ -11,7 +11,7 @@ on: - cron: '12 3 * * 4' # Refresh the cache weekly. workflow_dispatch: # Uncomment to test the workflow in a PR. - pull_request: # DO NOT MERGE: testing only + # pull_request: permissions: contents: read